Eric Lee / smarc-fsl-linux-kernel

1

/*

1

/*

2

* linux/kernel/fork.c

2

* linux/kernel/fork.c

3

*

3

*

4

5

*/

5

*/

6

7

/*

7

/*

8

* 'fork.c' contains the help-routines for the 'fork' system call

8

* 'fork.c' contains the help-routines for the 'fork' system call

9

* (see also entry.S and others).

9

* (see also entry.S and others).

10

* Fork is rather simple, once you get the hang of it, but the memory

10

* Fork is rather simple, once you get the hang of it, but the memory

11

* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'

11

* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'

12

*/

12

*/

13

14

#include <linux/slab.h>

14

#include <linux/slab.h>

15

#include <linux/init.h>

15

#include <linux/init.h>

16

#include <linux/unistd.h>

16

#include <linux/unistd.h>

17

#include <linux/module.h>

17

#include <linux/module.h>

18

#include <linux/vmalloc.h>

18

#include <linux/vmalloc.h>

19

#include <linux/completion.h>

19

#include <linux/completion.h>

20

#include <linux/mnt_namespace.h>

20

#include <linux/mnt_namespace.h>

21

#include <linux/personality.h>

21

#include <linux/personality.h>

22

#include <linux/mempolicy.h>

22

#include <linux/mempolicy.h>

23

#include <linux/sem.h>

23

#include <linux/sem.h>

24

#include <linux/file.h>

24

#include <linux/file.h>

25

#include <linux/fdtable.h>

25

#include <linux/fdtable.h>

26

#include <linux/key.h>

26

#include <linux/key.h>

27

#include <linux/binfmts.h>

27

#include <linux/binfmts.h>

28

#include <linux/mman.h>

28

#include <linux/mman.h>

29

#include <linux/fs.h>

29

#include <linux/fs.h>

30

#include <linux/nsproxy.h>

30

#include <linux/nsproxy.h>

31

#include <linux/capability.h>

31

#include <linux/capability.h>

32

#include <linux/cpu.h>

32

#include <linux/cpu.h>

33

#include <linux/cgroup.h>

33

#include <linux/cgroup.h>

34

#include <linux/security.h>

34

#include <linux/security.h>

35

#include <linux/swap.h>

35

#include <linux/swap.h>

36

#include <linux/syscalls.h>

36

#include <linux/syscalls.h>

37

#include <linux/jiffies.h>

37

#include <linux/jiffies.h>

38

#include <linux/futex.h>

38

#include <linux/futex.h>

39

#include <linux/task_io_accounting_ops.h>

39

#include <linux/task_io_accounting_ops.h>

40

#include <linux/rcupdate.h>

40

#include <linux/rcupdate.h>

41

#include <linux/ptrace.h>

41

#include <linux/ptrace.h>

42

#include <linux/mount.h>

42

#include <linux/mount.h>

43

#include <linux/audit.h>

43

#include <linux/audit.h>

44

#include <linux/memcontrol.h>

44

#include <linux/memcontrol.h>

45

#include <linux/profile.h>

45

#include <linux/profile.h>

46

#include <linux/rmap.h>

46

#include <linux/rmap.h>

47

#include <linux/acct.h>

47

#include <linux/acct.h>

48

#include <linux/tsacct_kern.h>

48

#include <linux/tsacct_kern.h>

49

#include <linux/cn_proc.h>

49

#include <linux/cn_proc.h>

50

#include <linux/freezer.h>

50

#include <linux/freezer.h>

51

#include <linux/delayacct.h>

51

#include <linux/delayacct.h>

52

#include <linux/taskstats_kern.h>

52

#include <linux/taskstats_kern.h>

53

#include <linux/random.h>

53

#include <linux/random.h>

54

#include <linux/tty.h>

54

#include <linux/tty.h>

55

#include <linux/proc_fs.h>

55

#include <linux/proc_fs.h>

56

#include <linux/blkdev.h>

56

#include <linux/blkdev.h>

57

58

#include <asm/pgtable.h>

58

#include <asm/pgtable.h>

59

#include <asm/pgalloc.h>

59

#include <asm/pgalloc.h>

60

#include <asm/uaccess.h>

60

#include <asm/uaccess.h>

61

#include <asm/mmu_context.h>

61

#include <asm/mmu_context.h>

62

#include <asm/cacheflush.h>

62

#include <asm/cacheflush.h>

63

#include <asm/tlbflush.h>

63

#include <asm/tlbflush.h>

64

65

/*

65

/*

66

* Protected counters by write_lock_irq(&tasklist_lock)

66

* Protected counters by write_lock_irq(&tasklist_lock)

67

*/

67

*/

68

unsigned long total_forks; /* Handle normal Linux uptimes. */

68

unsigned long total_forks; /* Handle normal Linux uptimes. */

69

int nr_threads; /* The idle threads do not count.. */

69

int nr_threads; /* The idle threads do not count.. */

70

71

int max_threads; /* tunable limit on nr_threads */

71

int max_threads; /* tunable limit on nr_threads */

72

73

DEFINE_PER_CPU(unsigned long, process_counts) = 0;

73

DEFINE_PER_CPU(unsigned long, process_counts) = 0;

74

75

__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */

75

__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */

76

77

int nr_processes(void)

77

int nr_processes(void)

78

{

78

{

79

int cpu;

79

int cpu;

80

int total = 0;

80

int total = 0;

81

82

for_each_online_cpu(cpu)

82

for_each_online_cpu(cpu)

83

total += per_cpu(process_counts, cpu);

83

total += per_cpu(process_counts, cpu);

84

85

return total;

85

return total;

86

}

86

}

87

88

#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR

88

#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR

89

# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)

89

# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)

90

# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))

90

# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))

91

static struct kmem_cache *task_struct_cachep;

91

static struct kmem_cache *task_struct_cachep;

92

#endif

92

#endif

93

94

/* SLAB cache for signal_struct structures (tsk->signal) */

94

/* SLAB cache for signal_struct structures (tsk->signal) */

95

static struct kmem_cache *signal_cachep;

95

static struct kmem_cache *signal_cachep;

96

97

/* SLAB cache for sighand_struct structures (tsk->sighand) */

97

/* SLAB cache for sighand_struct structures (tsk->sighand) */

98

struct kmem_cache *sighand_cachep;

98

struct kmem_cache *sighand_cachep;

99

100

/* SLAB cache for files_struct structures (tsk->files) */

100

/* SLAB cache for files_struct structures (tsk->files) */

101

struct kmem_cache *files_cachep;

101

struct kmem_cache *files_cachep;

102

103

/* SLAB cache for fs_struct structures (tsk->fs) */

103

/* SLAB cache for fs_struct structures (tsk->fs) */

104

struct kmem_cache *fs_cachep;

104

struct kmem_cache *fs_cachep;

105

106

/* SLAB cache for vm_area_struct structures */

106

/* SLAB cache for vm_area_struct structures */

107

struct kmem_cache *vm_area_cachep;

107

struct kmem_cache *vm_area_cachep;

108

109

/* SLAB cache for mm_struct structures (tsk->mm) */

109

/* SLAB cache for mm_struct structures (tsk->mm) */

110

static struct kmem_cache *mm_cachep;

110

static struct kmem_cache *mm_cachep;

111

112

void free_task(struct task_struct *tsk)

112

void free_task(struct task_struct *tsk)

113

{

113

{

114

prop_local_destroy_single(&tsk->dirties);

114

prop_local_destroy_single(&tsk->dirties);

115

free_thread_info(tsk->stack);

115

free_thread_info(tsk->stack);

116

rt_mutex_debug_task_free(tsk);

116

rt_mutex_debug_task_free(tsk);

117

free_task_struct(tsk);

117

free_task_struct(tsk);

118

}

118

}

119

EXPORT_SYMBOL(free_task);

119

EXPORT_SYMBOL(free_task);

120

121

void __put_task_struct(struct task_struct *tsk)

121

void __put_task_struct(struct task_struct *tsk)

122

{

122

{

123

WARN_ON(!tsk->exit_state);

123

WARN_ON(!tsk->exit_state);

124

WARN_ON(atomic_read(&tsk->usage));

124

WARN_ON(atomic_read(&tsk->usage));

125

WARN_ON(tsk == current);

125

WARN_ON(tsk == current);

126

127

security_task_free(tsk);

127

security_task_free(tsk);

128

free_uid(tsk->user);

128

free_uid(tsk->user);

129

put_group_info(tsk->group_info);

129

put_group_info(tsk->group_info);

130

delayacct_tsk_free(tsk);

130

delayacct_tsk_free(tsk);

131

132

if (!profile_handoff_task(tsk))

132

if (!profile_handoff_task(tsk))

133

free_task(tsk);

133

free_task(tsk);

134

}

134

}

135

136

/*

136

/*

137

* macro override instead of weak attribute alias, to workaround

137

* macro override instead of weak attribute alias, to workaround

138

* gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.

138

* gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.

139

*/

139

*/

140

#ifndef arch_task_cache_init

140

#ifndef arch_task_cache_init

141

#define arch_task_cache_init()

141

#define arch_task_cache_init()

142

#endif

142

#endif

143

144

void __init fork_init(unsigned long mempages)

144

void __init fork_init(unsigned long mempages)

145

{

145

{

146

#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR

146

#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR

147

#ifndef ARCH_MIN_TASKALIGN

147

#ifndef ARCH_MIN_TASKALIGN

148

#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES

148

#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES

149

#endif

149

#endif

150

/* create a slab on which task_structs can be allocated */

150

/* create a slab on which task_structs can be allocated */

151

task_struct_cachep =

151

task_struct_cachep =

152

kmem_cache_create("task_struct", sizeof(struct task_struct),

152

kmem_cache_create("task_struct", sizeof(struct task_struct),

153

ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);

153

ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);

154

#endif

154

#endif

155

156

/* do the arch specific task caches init */

156

/* do the arch specific task caches init */

157

arch_task_cache_init();

157

arch_task_cache_init();

158

159

/*

159

/*

160

* The default maximum number of threads is set to a safe

160

* The default maximum number of threads is set to a safe

161

* value: the thread structures can take up at most half

161

* value: the thread structures can take up at most half

162

* of memory.

162

* of memory.

163

*/

163

*/

164

max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);

164

max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);

165

166

/*

166

/*

167

* we need to allow at least 20 threads to boot a system

167

* we need to allow at least 20 threads to boot a system

168

*/

168

*/

169

if(max_threads < 20)

169

if(max_threads < 20)

170

max_threads = 20;

170

max_threads = 20;

171

172

init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;

172

init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;

173

init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;

173

init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;

174

init_task.signal->rlim[RLIMIT_SIGPENDING] =

174

init_task.signal->rlim[RLIMIT_SIGPENDING] =

175

init_task.signal->rlim[RLIMIT_NPROC];

175

init_task.signal->rlim[RLIMIT_NPROC];

176

}

176

}

177

178

int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,

178

int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,

179

struct task_struct *src)

179

struct task_struct *src)

180

{

180

{

181

*dst = *src;

181

*dst = *src;

182

return 0;

182

return 0;

183

}

183

}

184

185

static struct task_struct *dup_task_struct(struct task_struct *orig)

185

static struct task_struct *dup_task_struct(struct task_struct *orig)

186

{

186

{

187

struct task_struct *tsk;

187

struct task_struct *tsk;

188

struct thread_info *ti;

188

struct thread_info *ti;

189

int err;

189

int err;

190

191

prepare_to_copy(orig);

191

prepare_to_copy(orig);

192

193

tsk = alloc_task_struct();

193

tsk = alloc_task_struct();

194

if (!tsk)

194

if (!tsk)

195

return NULL;

195

return NULL;

196

197

ti = alloc_thread_info(tsk);

197

ti = alloc_thread_info(tsk);

198

if (!ti) {

198

if (!ti) {

199

free_task_struct(tsk);

199

free_task_struct(tsk);

200

return NULL;

200

return NULL;

201

}

201

}

202

203

err = arch_dup_task_struct(tsk, orig);

203

err = arch_dup_task_struct(tsk, orig);

204

if (err)

204

if (err)

205

goto out;

205

goto out;

206

207

tsk->stack = ti;

207

tsk->stack = ti;

208

209

err = prop_local_init_single(&tsk->dirties);

209

err = prop_local_init_single(&tsk->dirties);

210

if (err)

210

if (err)

211

goto out;

211

goto out;

212

213

setup_thread_stack(tsk, orig);

213

setup_thread_stack(tsk, orig);

214

215

#ifdef CONFIG_CC_STACKPROTECTOR

215

#ifdef CONFIG_CC_STACKPROTECTOR

216

tsk->stack_canary = get_random_int();

216

tsk->stack_canary = get_random_int();

217

#endif

217

#endif

218

219

/* One for us, one for whoever does the "release_task()" (usually parent) */

219

/* One for us, one for whoever does the "release_task()" (usually parent) */

220

atomic_set(&tsk->usage,2);

220

atomic_set(&tsk->usage,2);

221

atomic_set(&tsk->fs_excl, 0);

221

atomic_set(&tsk->fs_excl, 0);

222

#ifdef CONFIG_BLK_DEV_IO_TRACE

222

#ifdef CONFIG_BLK_DEV_IO_TRACE

223

tsk->btrace_seq = 0;

223

tsk->btrace_seq = 0;

224

#endif

224

#endif

225

tsk->splice_pipe = NULL;

225

tsk->splice_pipe = NULL;

226

return tsk;

226

return tsk;

227

228

out:

228

out:

229

free_thread_info(ti);

229

free_thread_info(ti);

230

free_task_struct(tsk);

230

free_task_struct(tsk);

231

return NULL;

231

return NULL;

232

}

232

}

233

234

#ifdef CONFIG_MMU

234

#ifdef CONFIG_MMU

235

static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)

235

static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)

236

{

236

{

237

struct vm_area_struct *mpnt, *tmp, **pprev;

237

struct vm_area_struct *mpnt, *tmp, **pprev;

238

struct rb_node **rb_link, *rb_parent;

238

struct rb_node **rb_link, *rb_parent;

239

int retval;

239

int retval;

240

unsigned long charge;

240

unsigned long charge;

241

struct mempolicy *pol;

241

struct mempolicy *pol;

242

243

down_write(&oldmm->mmap_sem);

243

down_write(&oldmm->mmap_sem);

244

flush_cache_dup_mm(oldmm);

244

flush_cache_dup_mm(oldmm);

245

/*

245

/*

246

* Not linked in yet - no deadlock potential:

246

* Not linked in yet - no deadlock potential:

247

*/

247

*/

248

down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);

248

down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);

249

250

mm->locked_vm = 0;

250

mm->locked_vm = 0;

251

mm->mmap = NULL;

251

mm->mmap = NULL;

252

mm->mmap_cache = NULL;

252

mm->mmap_cache = NULL;

253

mm->free_area_cache = oldmm->mmap_base;

253

mm->free_area_cache = oldmm->mmap_base;

254

mm->cached_hole_size = ~0UL;

254

mm->cached_hole_size = ~0UL;

255

mm->map_count = 0;

255

mm->map_count = 0;

256

cpus_clear(mm->cpu_vm_mask);

256

cpus_clear(mm->cpu_vm_mask);

257

mm->mm_rb = RB_ROOT;

257

mm->mm_rb = RB_ROOT;

258

rb_link = &mm->mm_rb.rb_node;

258

rb_link = &mm->mm_rb.rb_node;

259

rb_parent = NULL;

259

rb_parent = NULL;

260

pprev = &mm->mmap;

260

pprev = &mm->mmap;

261

262

for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {

262

for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {

263

struct file *file;

263

struct file *file;

264

265

if (mpnt->vm_flags & VM_DONTCOPY) {

265

if (mpnt->vm_flags & VM_DONTCOPY) {

266

long pages = vma_pages(mpnt);

266

long pages = vma_pages(mpnt);

267

mm->total_vm -= pages;

267

mm->total_vm -= pages;

268

vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,

268

vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,

269

-pages);

269

-pages);

270

continue;

270

continue;

271

}

271

}

272

charge = 0;

272

charge = 0;

273

if (mpnt->vm_flags & VM_ACCOUNT) {

273

if (mpnt->vm_flags & VM_ACCOUNT) {

274

unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;

274

unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;

275

if (security_vm_enough_memory(len))

275

if (security_vm_enough_memory(len))

276

goto fail_nomem;

276

goto fail_nomem;

277

charge = len;

277

charge = len;

278

}

278

}

279

tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);

279

tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);

280

if (!tmp)

280

if (!tmp)

281

goto fail_nomem;

281

goto fail_nomem;

282

*tmp = *mpnt;

282

*tmp = *mpnt;

283

pol = mpol_dup(vma_policy(mpnt));

283

pol = mpol_dup(vma_policy(mpnt));

284

retval = PTR_ERR(pol);

284

retval = PTR_ERR(pol);

285

if (IS_ERR(pol))

285

if (IS_ERR(pol))

286

goto fail_nomem_policy;

286

goto fail_nomem_policy;

287

vma_set_policy(tmp, pol);

287

vma_set_policy(tmp, pol);

288

tmp->vm_flags &= ~VM_LOCKED;

288

tmp->vm_flags &= ~VM_LOCKED;

289

tmp->vm_mm = mm;

289

tmp->vm_mm = mm;

290

tmp->vm_next = NULL;

290

tmp->vm_next = NULL;

291

anon_vma_link(tmp);

291

anon_vma_link(tmp);

292

file = tmp->vm_file;

292

file = tmp->vm_file;

293

if (file) {

293

if (file) {

294

struct inode *inode = file->f_path.dentry->d_inode;

294

struct inode *inode = file->f_path.dentry->d_inode;

295

get_file(file);

295

get_file(file);

296

if (tmp->vm_flags & VM_DENYWRITE)

296

if (tmp->vm_flags & VM_DENYWRITE)

297

atomic_dec(&inode->i_writecount);

297

atomic_dec(&inode->i_writecount);

298

299

/* insert tmp into the share list, just after mpnt */

299

/* insert tmp into the share list, just after mpnt */

300

spin_lock(&file->f_mapping->i_mmap_lock);

300

spin_lock(&file->f_mapping->i_mmap_lock);

301

tmp->vm_truncate_count = mpnt->vm_truncate_count;

301

tmp->vm_truncate_count = mpnt->vm_truncate_count;

302

flush_dcache_mmap_lock(file->f_mapping);

302

flush_dcache_mmap_lock(file->f_mapping);

303

vma_prio_tree_add(tmp, mpnt);

303

vma_prio_tree_add(tmp, mpnt);

304

flush_dcache_mmap_unlock(file->f_mapping);

304

flush_dcache_mmap_unlock(file->f_mapping);

305

spin_unlock(&file->f_mapping->i_mmap_lock);

305

spin_unlock(&file->f_mapping->i_mmap_lock);

306

}

306

}

307

308

/*

308

/*

309

* Link in the new vma and copy the page table entries.

309

* Link in the new vma and copy the page table entries.

310

*/

310

*/

311

*pprev = tmp;

311

*pprev = tmp;

312

pprev = &tmp->vm_next;

312

pprev = &tmp->vm_next;

313

314

__vma_link_rb(mm, tmp, rb_link, rb_parent);

314

__vma_link_rb(mm, tmp, rb_link, rb_parent);

315

rb_link = &tmp->vm_rb.rb_right;

315

rb_link = &tmp->vm_rb.rb_right;

316

rb_parent = &tmp->vm_rb;

316

rb_parent = &tmp->vm_rb;

317

318

mm->map_count++;

318

mm->map_count++;

319

retval = copy_page_range(mm, oldmm, mpnt);

319

retval = copy_page_range(mm, oldmm, mpnt);

320

321

if (tmp->vm_ops && tmp->vm_ops->open)

321

if (tmp->vm_ops && tmp->vm_ops->open)

322

tmp->vm_ops->open(tmp);

322

tmp->vm_ops->open(tmp);

323

324

if (retval)

324

if (retval)

325

goto out;

325

goto out;

326

}

326

}

327

/* a new mm has just been created */

327

/* a new mm has just been created */

328

arch_dup_mmap(oldmm, mm);

328

arch_dup_mmap(oldmm, mm);

329

retval = 0;

329

retval = 0;

330

out:

330

out:

331

up_write(&mm->mmap_sem);

331

up_write(&mm->mmap_sem);

332

flush_tlb_mm(oldmm);

332

flush_tlb_mm(oldmm);

333

up_write(&oldmm->mmap_sem);

333

up_write(&oldmm->mmap_sem);

334

return retval;

334

return retval;

335

fail_nomem_policy:

335

fail_nomem_policy:

336

kmem_cache_free(vm_area_cachep, tmp);

336

kmem_cache_free(vm_area_cachep, tmp);

337

fail_nomem:

337

fail_nomem:

338

retval = -ENOMEM;

338

retval = -ENOMEM;

339

vm_unacct_memory(charge);

339

vm_unacct_memory(charge);

340

goto out;

340

goto out;

341

}

341

}

342

343

static inline int mm_alloc_pgd(struct mm_struct * mm)

343

static inline int mm_alloc_pgd(struct mm_struct * mm)

344

{

344

{

345

mm->pgd = pgd_alloc(mm);

345

mm->pgd = pgd_alloc(mm);

346

if (unlikely(!mm->pgd))

346

if (unlikely(!mm->pgd))

347

return -ENOMEM;

347

return -ENOMEM;

348

return 0;

348

return 0;

349

}

349

}

350

351

static inline void mm_free_pgd(struct mm_struct * mm)

351

static inline void mm_free_pgd(struct mm_struct * mm)

352

{

352

{

353

pgd_free(mm, mm->pgd);

353

pgd_free(mm, mm->pgd);

354

}

354

}

355

#else

355

#else

356

#define dup_mmap(mm, oldmm) (0)

356

#define dup_mmap(mm, oldmm) (0)

357

#define mm_alloc_pgd(mm) (0)

357

#define mm_alloc_pgd(mm) (0)

358

#define mm_free_pgd(mm)

358

#define mm_free_pgd(mm)

359

#endif /* CONFIG_MMU */

359

#endif /* CONFIG_MMU */

360

361

__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);

361

__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);

362

363

#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))

363

#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))

364

#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))

364

#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))

365

366

#include <linux/init_task.h>

366

#include <linux/init_task.h>

367

368

static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)

368

static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)

369

{

369

{

370

atomic_set(&mm->mm_users, 1);

370

atomic_set(&mm->mm_users, 1);

371

atomic_set(&mm->mm_count, 1);

371

atomic_set(&mm->mm_count, 1);

372

init_rwsem(&mm->mmap_sem);

372

init_rwsem(&mm->mmap_sem);

373

INIT_LIST_HEAD(&mm->mmlist);

373

INIT_LIST_HEAD(&mm->mmlist);

374

mm->flags = (current->mm) ? current->mm->flags

374

mm->flags = (current->mm) ? current->mm->flags

375

: MMF_DUMP_FILTER_DEFAULT;

375

: MMF_DUMP_FILTER_DEFAULT;

376

mm->core_waiters = 0;

376

mm->core_waiters = 0;

377

mm->nr_ptes = 0;

377

mm->nr_ptes = 0;

378

set_mm_counter(mm, file_rss, 0);

378

set_mm_counter(mm, file_rss, 0);

379

set_mm_counter(mm, anon_rss, 0);

379

set_mm_counter(mm, anon_rss, 0);

380

spin_lock_init(&mm->page_table_lock);

380

spin_lock_init(&mm->page_table_lock);

381

rwlock_init(&mm->ioctx_list_lock);

381

rwlock_init(&mm->ioctx_list_lock);

382

mm->ioctx_list = NULL;

382

mm->ioctx_list = NULL;

383

mm->free_area_cache = TASK_UNMAPPED_BASE;

383

mm->free_area_cache = TASK_UNMAPPED_BASE;

384

mm->cached_hole_size = ~0UL;

384

mm->cached_hole_size = ~0UL;

385

mm_init_owner(mm, p);

385

mm_init_owner(mm, p);

386

387

if (likely(!mm_alloc_pgd(mm))) {

387

if (likely(!mm_alloc_pgd(mm))) {

388

mm->def_flags = 0;

388

mm->def_flags = 0;

389

return mm;

389

return mm;

390

}

390

}

391

392

free_mm(mm);

392

free_mm(mm);

393

return NULL;

393

return NULL;

394

}

394

}

395

396

/*

396

/*

397

* Allocate and initialize an mm_struct.

397

* Allocate and initialize an mm_struct.

398

*/

398

*/

399

struct mm_struct * mm_alloc(void)

399

struct mm_struct * mm_alloc(void)

400

{

400

{

401

struct mm_struct * mm;

401

struct mm_struct * mm;

402

403

mm = allocate_mm();

403

mm = allocate_mm();

404

if (mm) {

404

if (mm) {

405

memset(mm, 0, sizeof(*mm));

405

memset(mm, 0, sizeof(*mm));

406

mm = mm_init(mm, current);

406

mm = mm_init(mm, current);

407

}

407

}

408

return mm;

408

return mm;

409

}

409

}

410

411

/*

411

/*

412

* Called when the last reference to the mm

412

* Called when the last reference to the mm

413

* is dropped: either by a lazy thread or by

413

* is dropped: either by a lazy thread or by

414

* mmput. Free the page directory and the mm.

414

* mmput. Free the page directory and the mm.

415

*/

415

*/

416

void __mmdrop(struct mm_struct *mm)

416

void __mmdrop(struct mm_struct *mm)

417

{

417

{

418

BUG_ON(mm == &init_mm);

418

BUG_ON(mm == &init_mm);

419

mm_free_pgd(mm);

419

mm_free_pgd(mm);

420

destroy_context(mm);

420

destroy_context(mm);

421

free_mm(mm);

421

free_mm(mm);

422

}

422

}

423

EXPORT_SYMBOL_GPL(__mmdrop);

423

EXPORT_SYMBOL_GPL(__mmdrop);

424

425

/*

425

/*

426

* Decrement the use count and release all resources for an mm.

426

* Decrement the use count and release all resources for an mm.

427

*/

427

*/

428

void mmput(struct mm_struct *mm)

428

void mmput(struct mm_struct *mm)

429

{

429

{

430

might_sleep();

430

might_sleep();

431

432

if (atomic_dec_and_test(&mm->mm_users)) {

432

if (atomic_dec_and_test(&mm->mm_users)) {

433

exit_aio(mm);

433

exit_aio(mm);

434

exit_mmap(mm);

434

exit_mmap(mm);

435

set_mm_exe_file(mm, NULL);

435

set_mm_exe_file(mm, NULL);

436

if (!list_empty(&mm->mmlist)) {

436

if (!list_empty(&mm->mmlist)) {

437

spin_lock(&mmlist_lock);

437

spin_lock(&mmlist_lock);

438

list_del(&mm->mmlist);

438

list_del(&mm->mmlist);

439

spin_unlock(&mmlist_lock);

439

spin_unlock(&mmlist_lock);

440

}

440

}

441

put_swap_token(mm);

441

put_swap_token(mm);

442

mmdrop(mm);

442

mmdrop(mm);

443

}

443

}

444

}

444

}

445

EXPORT_SYMBOL_GPL(mmput);

445

EXPORT_SYMBOL_GPL(mmput);

446

447

/**

447

/**

448

* get_task_mm - acquire a reference to the task's mm

448

* get_task_mm - acquire a reference to the task's mm

449

*

449

*

450

* Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning

450

* Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning

451

* this kernel workthread has transiently adopted a user mm with use_mm,

451

* this kernel workthread has transiently adopted a user mm with use_mm,

452

* to do its AIO) is not set and if so returns a reference to it, after

452

* to do its AIO) is not set and if so returns a reference to it, after

453

* bumping up the use count. User must release the mm via mmput()

453

* bumping up the use count. User must release the mm via mmput()

454

* after use. Typically used by /proc and ptrace.

454

* after use. Typically used by /proc and ptrace.

455

*/

455

*/

456

struct mm_struct *get_task_mm(struct task_struct *task)

456

struct mm_struct *get_task_mm(struct task_struct *task)

457

{

457

{

458

struct mm_struct *mm;

458

struct mm_struct *mm;

459

460

task_lock(task);

460

task_lock(task);

461

mm = task->mm;

461

mm = task->mm;

462

if (mm) {

462

if (mm) {

463

if (task->flags & PF_BORROWED_MM)

463

if (task->flags & PF_BORROWED_MM)

464

mm = NULL;

464

mm = NULL;

465

else

465

else

466

atomic_inc(&mm->mm_users);

466

atomic_inc(&mm->mm_users);

467

}

467

}

468

task_unlock(task);

468

task_unlock(task);

469

return mm;

469

return mm;

470

}

470

}

471

EXPORT_SYMBOL_GPL(get_task_mm);

471

EXPORT_SYMBOL_GPL(get_task_mm);

472

473

/* Please note the differences between mmput and mm_release.

473

/* Please note the differences between mmput and mm_release.

474

* mmput is called whenever we stop holding onto a mm_struct,

474

* mmput is called whenever we stop holding onto a mm_struct,

475

* error success whatever.

475

* error success whatever.

476

*

476

*

477

* mm_release is called after a mm_struct has been removed

477

* mm_release is called after a mm_struct has been removed

478

* from the current process.

478

* from the current process.

479

*

479

*

480

* This difference is important for error handling, when we

480

* This difference is important for error handling, when we

481

* only half set up a mm_struct for a new process and need to restore

481

* only half set up a mm_struct for a new process and need to restore

482

* the old one. Because we mmput the new mm_struct before

482

* the old one. Because we mmput the new mm_struct before

483

* restoring the old one. . .

483

* restoring the old one. . .

484

* Eric Biederman 10 January 1998

484

* Eric Biederman 10 January 1998

485

*/

485

*/

486

void mm_release(struct task_struct *tsk, struct mm_struct *mm)

486

void mm_release(struct task_struct *tsk, struct mm_struct *mm)

487

{

487

{

488

struct completion *vfork_done = tsk->vfork_done;

488

struct completion *vfork_done = tsk->vfork_done;

489

490

/* Get rid of any cached register state */

490

/* Get rid of any cached register state */

491

deactivate_mm(tsk, mm);

491

deactivate_mm(tsk, mm);

492

493

/* notify parent sleeping on vfork() */

493

/* notify parent sleeping on vfork() */

494

if (vfork_done) {

494

if (vfork_done) {

495

tsk->vfork_done = NULL;

495

tsk->vfork_done = NULL;

496

complete(vfork_done);

496

complete(vfork_done);

497

}

497

}

498

499

/*

499

/*

500

* If we're exiting normally, clear a user-space tid field if

500

* If we're exiting normally, clear a user-space tid field if

501

* requested. We leave this alone when dying by signal, to leave

501

* requested. We leave this alone when dying by signal, to leave

502

* the value intact in a core dump, and to save the unnecessary

502

* the value intact in a core dump, and to save the unnecessary

503

* trouble otherwise. Userland only wants this done for a sys_exit.

503

* trouble otherwise. Userland only wants this done for a sys_exit.

504

*/

504

*/

505

if (tsk->clear_child_tid

505

if (tsk->clear_child_tid

506

&& !(tsk->flags & PF_SIGNALED)

506

&& !(tsk->flags & PF_SIGNALED)

507

&& atomic_read(&mm->mm_users) > 1) {

507

&& atomic_read(&mm->mm_users) > 1) {

508

u32 __user * tidptr = tsk->clear_child_tid;

508

u32 __user * tidptr = tsk->clear_child_tid;

509

tsk->clear_child_tid = NULL;

509

tsk->clear_child_tid = NULL;

510

511

/*

511

/*

512

* We don't check the error code - if userspace has

512

* We don't check the error code - if userspace has

513

* not set up a proper pointer then tough luck.

513

* not set up a proper pointer then tough luck.

514

*/

514

*/

515

put_user(0, tidptr);

515

put_user(0, tidptr);

516

sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);

516

sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);

517

}

517

}

518

}

518

}

519

520

/*

520

/*

521

* Allocate a new mm structure and copy contents from the

521

* Allocate a new mm structure and copy contents from the

522

* mm structure of the passed in task structure.

522

* mm structure of the passed in task structure.

523

*/

523

*/

524

struct mm_struct *dup_mm(struct task_struct *tsk)

524

struct mm_struct *dup_mm(struct task_struct *tsk)

525

{

525

{

526

struct mm_struct *mm, *oldmm = current->mm;

526

struct mm_struct *mm, *oldmm = current->mm;

527

int err;

527

int err;

528

529

if (!oldmm)

529

if (!oldmm)

530

return NULL;

530

return NULL;

531

532

mm = allocate_mm();

532

mm = allocate_mm();

533

if (!mm)

533

if (!mm)

534

goto fail_nomem;

534

goto fail_nomem;

535

536

memcpy(mm, oldmm, sizeof(*mm));

536

memcpy(mm, oldmm, sizeof(*mm));

537

538

/* Initializing for Swap token stuff */

538

/* Initializing for Swap token stuff */

539

mm->token_priority = 0;

539

mm->token_priority = 0;

540

mm->last_interval = 0;

540

mm->last_interval = 0;

541

542

if (!mm_init(mm, tsk))

542

if (!mm_init(mm, tsk))

543

goto fail_nomem;

543

goto fail_nomem;

544

545

if (init_new_context(tsk, mm))

545

if (init_new_context(tsk, mm))

546

goto fail_nocontext;

546

goto fail_nocontext;

547

548

dup_mm_exe_file(oldmm, mm);

548

dup_mm_exe_file(oldmm, mm);

549

550

err = dup_mmap(mm, oldmm);

550

err = dup_mmap(mm, oldmm);

551

if (err)

551

if (err)

552

goto free_pt;

552

goto free_pt;

553

554

mm->hiwater_rss = get_mm_rss(mm);

554

mm->hiwater_rss = get_mm_rss(mm);

555

mm->hiwater_vm = mm->total_vm;

555

mm->hiwater_vm = mm->total_vm;

556

557

return mm;

557

return mm;

558

559

free_pt:

559

free_pt:

560

mmput(mm);

560

mmput(mm);

561

562

fail_nomem:

562

fail_nomem:

563

return NULL;

563

return NULL;

564

565

fail_nocontext:

565

fail_nocontext:

566

/*

566

/*

567

* If init_new_context() failed, we cannot use mmput() to free the mm

567

* If init_new_context() failed, we cannot use mmput() to free the mm

568

* because it calls destroy_context()

568

* because it calls destroy_context()

569

*/

569

*/

570

mm_free_pgd(mm);

570

mm_free_pgd(mm);

571

free_mm(mm);

571

free_mm(mm);

572

return NULL;

572

return NULL;

573

}

573

}

574

575

static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)

575

static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)

576

{

576

{

577

struct mm_struct * mm, *oldmm;

577

struct mm_struct * mm, *oldmm;

578

int retval;

578

int retval;

579

580

tsk->min_flt = tsk->maj_flt = 0;

580

tsk->min_flt = tsk->maj_flt = 0;

581

tsk->nvcsw = tsk->nivcsw = 0;

581

tsk->nvcsw = tsk->nivcsw = 0;

582

583

tsk->mm = NULL;

583

tsk->mm = NULL;

584

tsk->active_mm = NULL;

584

tsk->active_mm = NULL;

585

586

/*

586

/*

587

* Are we cloning a kernel thread?

587

* Are we cloning a kernel thread?

588

*

588

*

589

* We need to steal a active VM for that..

589

* We need to steal a active VM for that..

590

*/

590

*/

591

oldmm = current->mm;

591

oldmm = current->mm;

592

if (!oldmm)

592

if (!oldmm)

593

return 0;

593

return 0;

594

595

if (clone_flags & CLONE_VM) {

595

if (clone_flags & CLONE_VM) {

596

atomic_inc(&oldmm->mm_users);

596

atomic_inc(&oldmm->mm_users);

597

mm = oldmm;

597

mm = oldmm;

598

goto good_mm;

598

goto good_mm;

599

}

599

}

600

601

retval = -ENOMEM;

601

retval = -ENOMEM;

602

mm = dup_mm(tsk);

602

mm = dup_mm(tsk);

603

if (!mm)

603

if (!mm)

604

goto fail_nomem;

604

goto fail_nomem;

605

606

good_mm:

606

good_mm:

607

/* Initializing for Swap token stuff */

607

/* Initializing for Swap token stuff */

608

mm->token_priority = 0;

608

mm->token_priority = 0;

609

mm->last_interval = 0;

609

mm->last_interval = 0;

610

611

tsk->mm = mm;

611

tsk->mm = mm;

612

tsk->active_mm = mm;

612

tsk->active_mm = mm;

613

return 0;

613

return 0;

614

615

fail_nomem:

615

fail_nomem:

616

return retval;

616

return retval;

617

}

617

}

618

619

static struct fs_struct *__copy_fs_struct(struct fs_struct *old)

619

static struct fs_struct *__copy_fs_struct(struct fs_struct *old)

620

{

620

{

621

struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);

621

struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);

622

/* We don't need to lock fs - think why ;-) */

622

/* We don't need to lock fs - think why ;-) */

623

if (fs) {

623

if (fs) {

624

atomic_set(&fs->count, 1);

624

atomic_set(&fs->count, 1);

625

rwlock_init(&fs->lock);

625

rwlock_init(&fs->lock);

626

fs->umask = old->umask;

626

fs->umask = old->umask;

627

read_lock(&old->lock);

627

read_lock(&old->lock);

628

fs->root = old->root;

628

fs->root = old->root;

629

path_get(&old->root);

629

path_get(&old->root);

630

fs->pwd = old->pwd;

630

fs->pwd = old->pwd;

631

path_get(&old->pwd);

631

path_get(&old->pwd);

632

if (old->altroot.dentry) {

632

if (old->altroot.dentry) {

633

fs->altroot = old->altroot;

633

fs->altroot = old->altroot;

634

path_get(&old->altroot);

634

path_get(&old->altroot);

635

} else {

635

} else {

636

fs->altroot.mnt = NULL;

636

fs->altroot.mnt = NULL;

637

fs->altroot.dentry = NULL;

637

fs->altroot.dentry = NULL;

638

}

638

}

639

read_unlock(&old->lock);

639

read_unlock(&old->lock);

640

}

640

}

641

return fs;

641

return fs;

642

}

642

}

643

644

struct fs_struct *copy_fs_struct(struct fs_struct *old)

644

struct fs_struct *copy_fs_struct(struct fs_struct *old)

645

{

645

{

646

return __copy_fs_struct(old);

646

return __copy_fs_struct(old);

647

}

647

}

648

649

EXPORT_SYMBOL_GPL(copy_fs_struct);

649

EXPORT_SYMBOL_GPL(copy_fs_struct);

650

651

static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)

651

static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)

652

{

652

{

653

if (clone_flags & CLONE_FS) {

653

if (clone_flags & CLONE_FS) {

654

atomic_inc(&current->fs->count);

654

atomic_inc(&current->fs->count);

655

return 0;

655

return 0;

656

}

656

}

657

tsk->fs = __copy_fs_struct(current->fs);

657

tsk->fs = __copy_fs_struct(current->fs);

658

if (!tsk->fs)

658

if (!tsk->fs)

659

return -ENOMEM;

659

return -ENOMEM;

660

return 0;

660

return 0;

661

}

661

}

662

663

static int count_open_files(struct fdtable *fdt)

664

{

665

int size = fdt->max_fds;

666

int i;

667

668

/* Find the last open fd */

669

for (i = size/(8*sizeof(long)); i > 0; ) {

670

if (fdt->open_fds->fds_bits[--i])

671

break;

672

}

673

i = (i+1) * 8 * sizeof(long);

674

return i;

675

}

676

677

static struct files_struct *alloc_files(void)

678

{

679

struct files_struct *newf;

680

struct fdtable *fdt;

681

682

newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);

683

if (!newf)

684

goto out;

685

686

atomic_set(&newf->count, 1);

687

688

spin_lock_init(&newf->file_lock);

689

newf->next_fd = 0;

690

fdt = &newf->fdtab;

691

fdt->max_fds = NR_OPEN_DEFAULT;

692

fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;

693

fdt->open_fds = (fd_set *)&newf->open_fds_init;

694

fdt->fd = &newf->fd_array[0];

695

INIT_RCU_HEAD(&fdt->rcu);

696

fdt->next = NULL;

697

rcu_assign_pointer(newf->fdt, fdt);

698

out:

699

return newf;

700

}

701

702

/*

703

* Allocate a new files structure and copy contents from the

704

* passed in files structure.

705

* errorp will be valid only when the returned files_struct is NULL.

706

*/

707

static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)

708

{

709

struct files_struct *newf;

710

struct file **old_fds, **new_fds;

711

int open_files, size, i;

712

struct fdtable *old_fdt, *new_fdt;

713

714

*errorp = -ENOMEM;

715

newf = alloc_files();

716

if (!newf)

717

goto out;

718

719

spin_lock(&oldf->file_lock);

720

old_fdt = files_fdtable(oldf);

721

new_fdt = files_fdtable(newf);

722

open_files = count_open_files(old_fdt);

723

724

/*

725

* Check whether we need to allocate a larger fd array and fd set.

726

* Note: we're not a clone task, so the open count won't change.

727

*/

728

if (open_files > new_fdt->max_fds) {

729

new_fdt->max_fds = 0;

730

spin_unlock(&oldf->file_lock);

731

spin_lock(&newf->file_lock);

732

*errorp = expand_files(newf, open_files-1);

733

spin_unlock(&newf->file_lock);

734

if (*errorp < 0)

735

goto out_release;

736

new_fdt = files_fdtable(newf);

737

/*

738

* Reacquire the oldf lock and a pointer to its fd table

739

* who knows it may have a new bigger fd table. We need

740

* the latest pointer.

741

*/

742

spin_lock(&oldf->file_lock);

743

old_fdt = files_fdtable(oldf);

744

}

745

746

old_fds = old_fdt->fd;

747

new_fds = new_fdt->fd;

748

749

memcpy(new_fdt->open_fds->fds_bits,

750

old_fdt->open_fds->fds_bits, open_files/8);

751

memcpy(new_fdt->close_on_exec->fds_bits,

752

old_fdt->close_on_exec->fds_bits, open_files/8);

753

754

for (i = open_files; i != 0; i--) {

755

struct file *f = *old_fds++;

756

if (f) {

757

get_file(f);

758

} else {

759

/*

760

* The fd may be claimed in the fd bitmap but not yet

761

* instantiated in the files array if a sibling thread

762

* is partway through open(). So make sure that this

763

* fd is available to the new process.

764

*/

765

FD_CLR(open_files - i, new_fdt->open_fds);

766

}

767

rcu_assign_pointer(*new_fds++, f);

768

}

769

spin_unlock(&oldf->file_lock);

770

771

/* compute the remainder to be cleared */

772

size = (new_fdt->max_fds - open_files) * sizeof(struct file *);

773

774

/* This is long word aligned thus could use a optimized version */

775

memset(new_fds, 0, size);

776

777

if (new_fdt->max_fds > open_files) {

778

int left = (new_fdt->max_fds-open_files)/8;

779

int start = open_files / (8 * sizeof(unsigned long));

780

781

memset(&new_fdt->open_fds->fds_bits[start], 0, left);

782

memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);

783

}

784

785

return newf;

786

787

out_release:

788

kmem_cache_free(files_cachep, newf);

789

out:

790

return NULL;

791

}

792

793

static int copy_files(unsigned long clone_flags, struct task_struct * tsk)

663

static int copy_files(unsigned long clone_flags, struct task_struct * tsk)

794

{

664

{

795

struct files_struct *oldf, *newf;

665

struct files_struct *oldf, *newf;

796

int error = 0;

666

int error = 0;

797

667

798

/*

668

/*

799

* A background process may not have any files ...

669

* A background process may not have any files ...

800

*/

670

*/

801

oldf = current->files;

671

oldf = current->files;

802

if (!oldf)

672

if (!oldf)

803

goto out;

673

goto out;

804

674

805

if (clone_flags & CLONE_FILES) {

675

if (clone_flags & CLONE_FILES) {

806

atomic_inc(&oldf->count);

676

atomic_inc(&oldf->count);

807

goto out;

677

goto out;

808

}

678

}

809

679

810

newf = dup_fd(oldf, &error);

680

newf = dup_fd(oldf, &error);

811

if (!newf)

681

if (!newf)

812

goto out;

682

goto out;

813

683

814

tsk->files = newf;

684

tsk->files = newf;

815

error = 0;

685

error = 0;

816

out:

686

out:

817

return error;

687

return error;

818

}

688

}

819

689

820

static int copy_io(unsigned long clone_flags, struct task_struct *tsk)

690

static int copy_io(unsigned long clone_flags, struct task_struct *tsk)

821

{

691

{

822

#ifdef CONFIG_BLOCK

692

#ifdef CONFIG_BLOCK

823

struct io_context *ioc = current->io_context;

693

struct io_context *ioc = current->io_context;

824

694

825

if (!ioc)

695

if (!ioc)

826

return 0;

696

return 0;

827

/*

697

/*

828

* Share io context with parent, if CLONE_IO is set

698

* Share io context with parent, if CLONE_IO is set

829

*/

699

*/

830

if (clone_flags & CLONE_IO) {

700

if (clone_flags & CLONE_IO) {

831

tsk->io_context = ioc_task_link(ioc);

701

tsk->io_context = ioc_task_link(ioc);

832

if (unlikely(!tsk->io_context))

702

if (unlikely(!tsk->io_context))

833

return -ENOMEM;

703

return -ENOMEM;

834

} else if (ioprio_valid(ioc->ioprio)) {

704

} else if (ioprio_valid(ioc->ioprio)) {

835

tsk->io_context = alloc_io_context(GFP_KERNEL, -1);

705

tsk->io_context = alloc_io_context(GFP_KERNEL, -1);

836

if (unlikely(!tsk->io_context))

706

if (unlikely(!tsk->io_context))

837

return -ENOMEM;

707

return -ENOMEM;

838

708

839

tsk->io_context->ioprio = ioc->ioprio;

709

tsk->io_context->ioprio = ioc->ioprio;

840

}

710

}

841

#endif

711

#endif

842

return 0;

712

return 0;

843

}

713

}

844

714

845

static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)

715

static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)

846

{

716

{

847

struct sighand_struct *sig;

717

struct sighand_struct *sig;

848

718

849

if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {

719

if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {

850

atomic_inc(&current->sighand->count);

720

atomic_inc(&current->sighand->count);

851

return 0;

721

return 0;

852

}

722

}

853

sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);

723

sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);

854

rcu_assign_pointer(tsk->sighand, sig);

724

rcu_assign_pointer(tsk->sighand, sig);

855

if (!sig)

725

if (!sig)

856

return -ENOMEM;

726

return -ENOMEM;

857

atomic_set(&sig->count, 1);

727

atomic_set(&sig->count, 1);

858

memcpy(sig->action, current->sighand->action, sizeof(sig->action));

728

memcpy(sig->action, current->sighand->action, sizeof(sig->action));

859

return 0;

729

return 0;

860

}

730

}

861

731

862

void __cleanup_sighand(struct sighand_struct *sighand)

732

void __cleanup_sighand(struct sighand_struct *sighand)

863

{

733

{

864

if (atomic_dec_and_test(&sighand->count))

734

if (atomic_dec_and_test(&sighand->count))

865

kmem_cache_free(sighand_cachep, sighand);

735

kmem_cache_free(sighand_cachep, sighand);

866

}

736

}

867

737

868

static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)

738

static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)

869

{

739

{

870

struct signal_struct *sig;

740

struct signal_struct *sig;

871

int ret;

741

int ret;

872

742

873

if (clone_flags & CLONE_THREAD) {

743

if (clone_flags & CLONE_THREAD) {

874

atomic_inc(&current->signal->count);

744

atomic_inc(&current->signal->count);

875

atomic_inc(&current->signal->live);

745

atomic_inc(&current->signal->live);

876

return 0;

746

return 0;

877

}

747

}

878

sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);

748

sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);

879

tsk->signal = sig;

749

tsk->signal = sig;

880

if (!sig)

750

if (!sig)

881

return -ENOMEM;

751

return -ENOMEM;

882

752

883

ret = copy_thread_group_keys(tsk);

753

ret = copy_thread_group_keys(tsk);

884

if (ret < 0) {

754

if (ret < 0) {

885

kmem_cache_free(signal_cachep, sig);

755

kmem_cache_free(signal_cachep, sig);

886

return ret;

756

return ret;

887

}

757

}

888

758

889

atomic_set(&sig->count, 1);

759

atomic_set(&sig->count, 1);

890

atomic_set(&sig->live, 1);

760

atomic_set(&sig->live, 1);

891

init_waitqueue_head(&sig->wait_chldexit);

761

init_waitqueue_head(&sig->wait_chldexit);

892

sig->flags = 0;

762

sig->flags = 0;

893

sig->group_exit_code = 0;

763

sig->group_exit_code = 0;

894

sig->group_exit_task = NULL;

764

sig->group_exit_task = NULL;

895

sig->group_stop_count = 0;

765

sig->group_stop_count = 0;

896

sig->curr_target = tsk;

766

sig->curr_target = tsk;

897

init_sigpending(&sig->shared_pending);

767

init_sigpending(&sig->shared_pending);

898

INIT_LIST_HEAD(&sig->posix_timers);

768

INIT_LIST_HEAD(&sig->posix_timers);

899

769

900

hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);

770

hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);

901

sig->it_real_incr.tv64 = 0;

771

sig->it_real_incr.tv64 = 0;

902

sig->real_timer.function = it_real_fn;

772

sig->real_timer.function = it_real_fn;

903

773

904

sig->it_virt_expires = cputime_zero;

774

sig->it_virt_expires = cputime_zero;

905

sig->it_virt_incr = cputime_zero;

775

sig->it_virt_incr = cputime_zero;

906

sig->it_prof_expires = cputime_zero;

776

sig->it_prof_expires = cputime_zero;

907

sig->it_prof_incr = cputime_zero;

777

sig->it_prof_incr = cputime_zero;

908

778

909

sig->leader = 0; /* session leadership doesn't inherit */

779

sig->leader = 0; /* session leadership doesn't inherit */

910

sig->tty_old_pgrp = NULL;

780

sig->tty_old_pgrp = NULL;

911

781

912

sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;

782

sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;

913

sig->gtime = cputime_zero;

783

sig->gtime = cputime_zero;

914

sig->cgtime = cputime_zero;

784

sig->cgtime = cputime_zero;

915

sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;

785

sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;

916

sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;

786

sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;

917

sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;

787

sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;

918

sig->sum_sched_runtime = 0;

788

sig->sum_sched_runtime = 0;

919

INIT_LIST_HEAD(&sig->cpu_timers[0]);

789

INIT_LIST_HEAD(&sig->cpu_timers[0]);

920

INIT_LIST_HEAD(&sig->cpu_timers[1]);

790

INIT_LIST_HEAD(&sig->cpu_timers[1]);

921

INIT_LIST_HEAD(&sig->cpu_timers[2]);

791

INIT_LIST_HEAD(&sig->cpu_timers[2]);

922

taskstats_tgid_init(sig);

792

taskstats_tgid_init(sig);

923

793

924

task_lock(current->group_leader);

794

task_lock(current->group_leader);

925

memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);

795

memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);

926

task_unlock(current->group_leader);

796

task_unlock(current->group_leader);

927

797

928

if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {

798

if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {

929

/*

799

/*

930

* New sole thread in the process gets an expiry time

800

* New sole thread in the process gets an expiry time

931

* of the whole CPU time limit.

801

* of the whole CPU time limit.

932

*/

802

*/

933

tsk->it_prof_expires =

803

tsk->it_prof_expires =

934

secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);

804

secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);

935

}

805

}

936

acct_init_pacct(&sig->pacct);

806

acct_init_pacct(&sig->pacct);

937

807

938

tty_audit_fork(sig);

808

tty_audit_fork(sig);

939

809

940

return 0;

810

return 0;

941

}

811

}

942

812

943

void __cleanup_signal(struct signal_struct *sig)

813

void __cleanup_signal(struct signal_struct *sig)

944

{

814

{

945

exit_thread_group_keys(sig);

815

exit_thread_group_keys(sig);

946

kmem_cache_free(signal_cachep, sig);

816

kmem_cache_free(signal_cachep, sig);

947

}

817

}

948

818

949

static void cleanup_signal(struct task_struct *tsk)

819

static void cleanup_signal(struct task_struct *tsk)

950

{

820

{

951

struct signal_struct *sig = tsk->signal;

821

struct signal_struct *sig = tsk->signal;

952

822

953

atomic_dec(&sig->live);

823

atomic_dec(&sig->live);

954

824

955

if (atomic_dec_and_test(&sig->count))

825

if (atomic_dec_and_test(&sig->count))

956

__cleanup_signal(sig);

826

__cleanup_signal(sig);

957

}

827

}

958

828

959

static void copy_flags(unsigned long clone_flags, struct task_struct *p)

829

static void copy_flags(unsigned long clone_flags, struct task_struct *p)

960

{

830

{

961

unsigned long new_flags = p->flags;

831

unsigned long new_flags = p->flags;

962

832

963

new_flags &= ~PF_SUPERPRIV;

833

new_flags &= ~PF_SUPERPRIV;

964

new_flags |= PF_FORKNOEXEC;

834

new_flags |= PF_FORKNOEXEC;

965

if (!(clone_flags & CLONE_PTRACE))

835

if (!(clone_flags & CLONE_PTRACE))

966

p->ptrace = 0;

836

p->ptrace = 0;

967

p->flags = new_flags;

837

p->flags = new_flags;

968

clear_freeze_flag(p);

838

clear_freeze_flag(p);

969

}

839

}

970

840

971

asmlinkage long sys_set_tid_address(int __user *tidptr)

841

asmlinkage long sys_set_tid_address(int __user *tidptr)

972

{

842

{

973

current->clear_child_tid = tidptr;

843

current->clear_child_tid = tidptr;

974

844

975

return task_pid_vnr(current);

845

return task_pid_vnr(current);

976

}

846

}

977

847

978

static void rt_mutex_init_task(struct task_struct *p)

848

static void rt_mutex_init_task(struct task_struct *p)

979

{

849

{

980

spin_lock_init(&p->pi_lock);

850

spin_lock_init(&p->pi_lock);

981

#ifdef CONFIG_RT_MUTEXES

851

#ifdef CONFIG_RT_MUTEXES

982

plist_head_init(&p->pi_waiters, &p->pi_lock);

852

plist_head_init(&p->pi_waiters, &p->pi_lock);

983

p->pi_blocked_on = NULL;

853

p->pi_blocked_on = NULL;

984

#endif

854

#endif

985

}

855

}

986

856

987

#ifdef CONFIG_MM_OWNER

857

#ifdef CONFIG_MM_OWNER

988

void mm_init_owner(struct mm_struct *mm, struct task_struct *p)

858

void mm_init_owner(struct mm_struct *mm, struct task_struct *p)

989

{

859

{

990

mm->owner = p;

860

mm->owner = p;

991

}

861

}

992

#endif /* CONFIG_MM_OWNER */

862

#endif /* CONFIG_MM_OWNER */

993

863

994

/*

864

/*

995

* This creates a new process as a copy of the old one,

865

* This creates a new process as a copy of the old one,

996

* but does not actually start it yet.

866

* but does not actually start it yet.

997

*

867

*

998

* It copies the registers, and all the appropriate

868

* It copies the registers, and all the appropriate

999

* parts of the process environment (as per the clone

869

* parts of the process environment (as per the clone

1000

* flags). The actual kick-off is left to the caller.

870

* flags). The actual kick-off is left to the caller.

1001

*/

871

*/

1002

static struct task_struct *copy_process(unsigned long clone_flags,

872

static struct task_struct *copy_process(unsigned long clone_flags,

1003

unsigned long stack_start,

873

unsigned long stack_start,

1004

struct pt_regs *regs,

874

struct pt_regs *regs,

1005

unsigned long stack_size,

875

unsigned long stack_size,

1006

int __user *child_tidptr,

876

int __user *child_tidptr,

1007

struct pid *pid)

877

struct pid *pid)

1008

{

878

{

1009

int retval;

879

int retval;

1010

struct task_struct *p;

880

struct task_struct *p;

1011

int cgroup_callbacks_done = 0;

881

int cgroup_callbacks_done = 0;

1012

882

1013

if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))

883

if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))

1014

return ERR_PTR(-EINVAL);

884

return ERR_PTR(-EINVAL);

1015

885

1016

/*

886

/*

1017

* Thread groups must share signals as well, and detached threads

887

* Thread groups must share signals as well, and detached threads

1018

* can only be started up within the thread group.

888

* can only be started up within the thread group.

1019

*/

889

*/

1020

if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))

890

if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))

1021

return ERR_PTR(-EINVAL);

891

return ERR_PTR(-EINVAL);

1022

892

1023

/*

893

/*

1024

* Shared signal handlers imply shared VM. By way of the above,

894

* Shared signal handlers imply shared VM. By way of the above,

1025

* thread groups also imply shared VM. Blocking this case allows

895

* thread groups also imply shared VM. Blocking this case allows

1026

* for various simplifications in other code.

896

* for various simplifications in other code.

1027

*/

897

*/

1028

if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))

898

if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))

1029

return ERR_PTR(-EINVAL);

899

return ERR_PTR(-EINVAL);

1030

900

1031

retval = security_task_create(clone_flags);

901

retval = security_task_create(clone_flags);

1032

if (retval)

902

if (retval)

1033

goto fork_out;

903

goto fork_out;

1034

904

1035

retval = -ENOMEM;

905

retval = -ENOMEM;

1036

p = dup_task_struct(current);

906

p = dup_task_struct(current);

1037

if (!p)

907

if (!p)

1038

goto fork_out;

908

goto fork_out;

1039

909

1040

rt_mutex_init_task(p);

910

rt_mutex_init_task(p);

1041

911

1042

#ifdef CONFIG_TRACE_IRQFLAGS

912

#ifdef CONFIG_TRACE_IRQFLAGS

1043

DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);

913

DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);

1044

DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);

914

DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);

1045

#endif

915

#endif

1046

retval = -EAGAIN;

916

retval = -EAGAIN;

1047

if (atomic_read(&p->user->processes) >=

917

if (atomic_read(&p->user->processes) >=

1048

p->signal->rlim[RLIMIT_NPROC].rlim_cur) {

918

p->signal->rlim[RLIMIT_NPROC].rlim_cur) {

1049

if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&

919

if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&

1050

p->user != current->nsproxy->user_ns->root_user)

920

p->user != current->nsproxy->user_ns->root_user)

1051

goto bad_fork_free;

921

goto bad_fork_free;

1052

}

922

}

1053

923

1054

atomic_inc(&p->user->__count);

924

atomic_inc(&p->user->__count);

1055

atomic_inc(&p->user->processes);

925

atomic_inc(&p->user->processes);

1056

get_group_info(p->group_info);

926

get_group_info(p->group_info);

1057

927

1058

/*

928

/*

1059

* If multiple threads are within copy_process(), then this check

929

* If multiple threads are within copy_process(), then this check

1060

* triggers too late. This doesn't hurt, the check is only there

930

* triggers too late. This doesn't hurt, the check is only there

1061

* to stop root fork bombs.

931

* to stop root fork bombs.

1062

*/

932

*/

1063

if (nr_threads >= max_threads)

933

if (nr_threads >= max_threads)

1064

goto bad_fork_cleanup_count;

934

goto bad_fork_cleanup_count;

1065

935

1066

if (!try_module_get(task_thread_info(p)->exec_domain->module))

936

if (!try_module_get(task_thread_info(p)->exec_domain->module))

1067

goto bad_fork_cleanup_count;

937

goto bad_fork_cleanup_count;

1068

938

1069

if (p->binfmt && !try_module_get(p->binfmt->module))

939

if (p->binfmt && !try_module_get(p->binfmt->module))

1070

goto bad_fork_cleanup_put_domain;

940

goto bad_fork_cleanup_put_domain;

1071

941

1072

p->did_exec = 0;

942

p->did_exec = 0;

1073

delayacct_tsk_init(p); /* Must remain after dup_task_struct() */

943

delayacct_tsk_init(p); /* Must remain after dup_task_struct() */

1074

copy_flags(clone_flags, p);

944

copy_flags(clone_flags, p);

1075

INIT_LIST_HEAD(&p->children);

945

INIT_LIST_HEAD(&p->children);

1076

INIT_LIST_HEAD(&p->sibling);

946

INIT_LIST_HEAD(&p->sibling);

1077

#ifdef CONFIG_PREEMPT_RCU

947

#ifdef CONFIG_PREEMPT_RCU

1078

p->rcu_read_lock_nesting = 0;

948

p->rcu_read_lock_nesting = 0;

1079

p->rcu_flipctr_idx = 0;

949

p->rcu_flipctr_idx = 0;

1080

#endif /* #ifdef CONFIG_PREEMPT_RCU */

950

#endif /* #ifdef CONFIG_PREEMPT_RCU */

1081

p->vfork_done = NULL;

951

p->vfork_done = NULL;

1082

spin_lock_init(&p->alloc_lock);

952

spin_lock_init(&p->alloc_lock);

1083

953

1084

clear_tsk_thread_flag(p, TIF_SIGPENDING);

954

clear_tsk_thread_flag(p, TIF_SIGPENDING);

1085

init_sigpending(&p->pending);

955

init_sigpending(&p->pending);

1086

956

1087

p->utime = cputime_zero;

957

p->utime = cputime_zero;

1088

p->stime = cputime_zero;

958

p->stime = cputime_zero;

1089

p->gtime = cputime_zero;

959

p->gtime = cputime_zero;

1090

p->utimescaled = cputime_zero;

960

p->utimescaled = cputime_zero;

1091

p->stimescaled = cputime_zero;

961

p->stimescaled = cputime_zero;

1092

p->prev_utime = cputime_zero;

962

p->prev_utime = cputime_zero;

1093

p->prev_stime = cputime_zero;

963

p->prev_stime = cputime_zero;

1094

964

1095

#ifdef CONFIG_DETECT_SOFTLOCKUP

965

#ifdef CONFIG_DETECT_SOFTLOCKUP

1096

p->last_switch_count = 0;

966

p->last_switch_count = 0;

1097

p->last_switch_timestamp = 0;

967

p->last_switch_timestamp = 0;

1098

#endif

968

#endif

1099

969

1100

#ifdef CONFIG_TASK_XACCT

970

#ifdef CONFIG_TASK_XACCT

1101

p->rchar = 0; /* I/O counter: bytes read */

971

p->rchar = 0; /* I/O counter: bytes read */

1102

p->wchar = 0; /* I/O counter: bytes written */

972

p->wchar = 0; /* I/O counter: bytes written */

1103

p->syscr = 0; /* I/O counter: read syscalls */

973

p->syscr = 0; /* I/O counter: read syscalls */

1104

p->syscw = 0; /* I/O counter: write syscalls */

974

p->syscw = 0; /* I/O counter: write syscalls */

1105

#endif

975

#endif

1106

task_io_accounting_init(p);

976

task_io_accounting_init(p);

1107

acct_clear_integrals(p);

977

acct_clear_integrals(p);

1108

978

1109

p->it_virt_expires = cputime_zero;

979

p->it_virt_expires = cputime_zero;

1110

p->it_prof_expires = cputime_zero;

980

p->it_prof_expires = cputime_zero;

1111

p->it_sched_expires = 0;

981

p->it_sched_expires = 0;

1112

INIT_LIST_HEAD(&p->cpu_timers[0]);

982

INIT_LIST_HEAD(&p->cpu_timers[0]);

1113

INIT_LIST_HEAD(&p->cpu_timers[1]);

983

INIT_LIST_HEAD(&p->cpu_timers[1]);

1114

INIT_LIST_HEAD(&p->cpu_timers[2]);

984

INIT_LIST_HEAD(&p->cpu_timers[2]);

1115

985

1116

p->lock_depth = -1; /* -1 = no lock */

986

p->lock_depth = -1; /* -1 = no lock */

1117

do_posix_clock_monotonic_gettime(&p->start_time);

987

do_posix_clock_monotonic_gettime(&p->start_time);

1118

p->real_start_time = p->start_time;

988

p->real_start_time = p->start_time;

1119

monotonic_to_bootbased(&p->real_start_time);

989

monotonic_to_bootbased(&p->real_start_time);

1120

#ifdef CONFIG_SECURITY

990

#ifdef CONFIG_SECURITY

1121

p->security = NULL;

991

p->security = NULL;

1122

#endif

992

#endif

1123

p->cap_bset = current->cap_bset;

993

p->cap_bset = current->cap_bset;

1124

p->io_context = NULL;

994

p->io_context = NULL;

1125

p->audit_context = NULL;

995

p->audit_context = NULL;

1126

cgroup_fork(p);

996

cgroup_fork(p);

1127

#ifdef CONFIG_NUMA

997

#ifdef CONFIG_NUMA

1128

p->mempolicy = mpol_dup(p->mempolicy);

998

p->mempolicy = mpol_dup(p->mempolicy);

1129

if (IS_ERR(p->mempolicy)) {

999

if (IS_ERR(p->mempolicy)) {

1130

retval = PTR_ERR(p->mempolicy);

1000

retval = PTR_ERR(p->mempolicy);

1131

p->mempolicy = NULL;

1001

p->mempolicy = NULL;

1132

goto bad_fork_cleanup_cgroup;

1002

goto bad_fork_cleanup_cgroup;

1133

}

1003

}

1134

mpol_fix_fork_child_flag(p);

1004

mpol_fix_fork_child_flag(p);

1135

#endif

1005

#endif

1136

#ifdef CONFIG_TRACE_IRQFLAGS

1006

#ifdef CONFIG_TRACE_IRQFLAGS

1137

p->irq_events = 0;

1007

p->irq_events = 0;

1138

#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW

1008

#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW

1139

p->hardirqs_enabled = 1;

1009

p->hardirqs_enabled = 1;

1140

#else

1010

#else

1141

p->hardirqs_enabled = 0;

1011

p->hardirqs_enabled = 0;

1142

#endif

1012

#endif

1143

p->hardirq_enable_ip = 0;

1013

p->hardirq_enable_ip = 0;

1144

p->hardirq_enable_event = 0;

1014

p->hardirq_enable_event = 0;

1145

p->hardirq_disable_ip = _THIS_IP_;

1015

p->hardirq_disable_ip = _THIS_IP_;

1146

p->hardirq_disable_event = 0;

1016

p->hardirq_disable_event = 0;

1147

p->softirqs_enabled = 1;

1017

p->softirqs_enabled = 1;

1148

p->softirq_enable_ip = _THIS_IP_;

1018

p->softirq_enable_ip = _THIS_IP_;

1149

p->softirq_enable_event = 0;

1019

p->softirq_enable_event = 0;

1150

p->softirq_disable_ip = 0;

1020

p->softirq_disable_ip = 0;

1151

p->softirq_disable_event = 0;

1021

p->softirq_disable_event = 0;

1152

p->hardirq_context = 0;

1022

p->hardirq_context = 0;

1153

p->softirq_context = 0;

1023

p->softirq_context = 0;

1154

#endif

1024

#endif

1155

#ifdef CONFIG_LOCKDEP

1025

#ifdef CONFIG_LOCKDEP

1156

p->lockdep_depth = 0; /* no locks held yet */

1026

p->lockdep_depth = 0; /* no locks held yet */

1157

p->curr_chain_key = 0;

1027

p->curr_chain_key = 0;

1158

p->lockdep_recursion = 0;

1028

p->lockdep_recursion = 0;

1159

#endif

1029

#endif

1160

1030

1161

#ifdef CONFIG_DEBUG_MUTEXES

1031

#ifdef CONFIG_DEBUG_MUTEXES

1162

p->blocked_on = NULL; /* not blocked yet */

1032

p->blocked_on = NULL; /* not blocked yet */

1163

#endif

1033

#endif

1164

1034

1165

/* Perform scheduler related setup. Assign this task to a CPU. */

1035

/* Perform scheduler related setup. Assign this task to a CPU. */

1166

sched_fork(p, clone_flags);

1036

sched_fork(p, clone_flags);

1167

1037

1168

if ((retval = security_task_alloc(p)))

1038

if ((retval = security_task_alloc(p)))

1169

goto bad_fork_cleanup_policy;

1039

goto bad_fork_cleanup_policy;

1170

if ((retval = audit_alloc(p)))

1040

if ((retval = audit_alloc(p)))

1171

goto bad_fork_cleanup_security;

1041

goto bad_fork_cleanup_security;

1172

/* copy all the process information */

1042

/* copy all the process information */

1173

if ((retval = copy_semundo(clone_flags, p)))

1043

if ((retval = copy_semundo(clone_flags, p)))

1174

goto bad_fork_cleanup_audit;

1044

goto bad_fork_cleanup_audit;

1175

if ((retval = copy_files(clone_flags, p)))

1045

if ((retval = copy_files(clone_flags, p)))

1176

goto bad_fork_cleanup_semundo;

1046

goto bad_fork_cleanup_semundo;

1177

if ((retval = copy_fs(clone_flags, p)))

1047

if ((retval = copy_fs(clone_flags, p)))

1178

goto bad_fork_cleanup_files;

1048

goto bad_fork_cleanup_files;

1179

if ((retval = copy_sighand(clone_flags, p)))

1049

if ((retval = copy_sighand(clone_flags, p)))

1180

goto bad_fork_cleanup_fs;

1050

goto bad_fork_cleanup_fs;

1181

if ((retval = copy_signal(clone_flags, p)))

1051

if ((retval = copy_signal(clone_flags, p)))

1182

goto bad_fork_cleanup_sighand;

1052

goto bad_fork_cleanup_sighand;

1183

if ((retval = copy_mm(clone_flags, p)))

1053

if ((retval = copy_mm(clone_flags, p)))

1184

goto bad_fork_cleanup_signal;

1054

goto bad_fork_cleanup_signal;

1185

if ((retval = copy_keys(clone_flags, p)))

1055

if ((retval = copy_keys(clone_flags, p)))

1186

goto bad_fork_cleanup_mm;

1056

goto bad_fork_cleanup_mm;

1187

if ((retval = copy_namespaces(clone_flags, p)))

1057

if ((retval = copy_namespaces(clone_flags, p)))

1188

goto bad_fork_cleanup_keys;

1058

goto bad_fork_cleanup_keys;

1189

if ((retval = copy_io(clone_flags, p)))

1059

if ((retval = copy_io(clone_flags, p)))

1190

goto bad_fork_cleanup_namespaces;

1060

goto bad_fork_cleanup_namespaces;

1191

retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);

1061

retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);

1192

if (retval)

1062

if (retval)

1193

goto bad_fork_cleanup_io;

1063

goto bad_fork_cleanup_io;

1194

1064

1195

if (pid != &init_struct_pid) {

1065

if (pid != &init_struct_pid) {

1196

retval = -ENOMEM;

1066

retval = -ENOMEM;

1197

pid = alloc_pid(task_active_pid_ns(p));

1067

pid = alloc_pid(task_active_pid_ns(p));

1198

if (!pid)

1068

if (!pid)

1199

goto bad_fork_cleanup_io;

1069

goto bad_fork_cleanup_io;

1200

1070

1201

if (clone_flags & CLONE_NEWPID) {

1071

if (clone_flags & CLONE_NEWPID) {

1202

retval = pid_ns_prepare_proc(task_active_pid_ns(p));

1072

retval = pid_ns_prepare_proc(task_active_pid_ns(p));

1203

if (retval < 0)

1073

if (retval < 0)

1204

goto bad_fork_free_pid;

1074

goto bad_fork_free_pid;

1205

}

1075

}

1206

}

1076

}

1207

1077

1208

p->pid = pid_nr(pid);

1078

p->pid = pid_nr(pid);

1209

p->tgid = p->pid;

1079

p->tgid = p->pid;

1210

if (clone_flags & CLONE_THREAD)

1080

if (clone_flags & CLONE_THREAD)

1211

p->tgid = current->tgid;

1081

p->tgid = current->tgid;

1212

1082

1213

p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;

1083

p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;

1214

/*

1084

/*

1215

* Clear TID on mm_release()?

1085

* Clear TID on mm_release()?

1216

*/

1086

*/

1217

p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;

1087

p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;

1218

#ifdef CONFIG_FUTEX

1088

#ifdef CONFIG_FUTEX

1219

p->robust_list = NULL;

1089

p->robust_list = NULL;

1220

#ifdef CONFIG_COMPAT

1090

#ifdef CONFIG_COMPAT

1221

p->compat_robust_list = NULL;

1091

p->compat_robust_list = NULL;

1222

#endif

1092

#endif

1223

INIT_LIST_HEAD(&p->pi_state_list);

1093

INIT_LIST_HEAD(&p->pi_state_list);

1224

p->pi_state_cache = NULL;

1094

p->pi_state_cache = NULL;

1225

#endif

1095

#endif

1226

/*

1096

/*

1227

* sigaltstack should be cleared when sharing the same VM

1097

* sigaltstack should be cleared when sharing the same VM

1228

*/

1098

*/

1229

if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)

1099

if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)

1230

p->sas_ss_sp = p->sas_ss_size = 0;

1100

p->sas_ss_sp = p->sas_ss_size = 0;

1231

1101

1232

/*

1102

/*

1233

* Syscall tracing should be turned off in the child regardless

1103

* Syscall tracing should be turned off in the child regardless

1234

* of CLONE_PTRACE.

1104

* of CLONE_PTRACE.

1235

*/

1105

*/

1236

clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

1106

clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

1237

#ifdef TIF_SYSCALL_EMU

1107

#ifdef TIF_SYSCALL_EMU

1238

clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);

1108

clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);

1239

#endif

1109

#endif

1240

clear_all_latency_tracing(p);

1110

clear_all_latency_tracing(p);

1241

1111

1242

/* Our parent execution domain becomes current domain

1112

/* Our parent execution domain becomes current domain

1243

These must match for thread signalling to apply */

1113

These must match for thread signalling to apply */

1244

p->parent_exec_id = p->self_exec_id;

1114

p->parent_exec_id = p->self_exec_id;

1245

1115

1246

/* ok, now we should be set up.. */

1116

/* ok, now we should be set up.. */

1247

p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);

1117

p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);

1248

p->pdeath_signal = 0;

1118

p->pdeath_signal = 0;

1249

p->exit_state = 0;

1119

p->exit_state = 0;

1250

1120

1251

/*

1121

/*

1252

* Ok, make it visible to the rest of the system.

1122

* Ok, make it visible to the rest of the system.

1253

* We dont wake it up yet.

1123

* We dont wake it up yet.

1254

*/

1124

*/

1255

p->group_leader = p;

1125

p->group_leader = p;

1256

INIT_LIST_HEAD(&p->thread_group);

1126

INIT_LIST_HEAD(&p->thread_group);

1257

INIT_LIST_HEAD(&p->ptrace_children);

1127

INIT_LIST_HEAD(&p->ptrace_children);

1258

INIT_LIST_HEAD(&p->ptrace_list);

1128

INIT_LIST_HEAD(&p->ptrace_list);

1259

1129

1260

/* Now that the task is set up, run cgroup callbacks if

1130

/* Now that the task is set up, run cgroup callbacks if

1261

* necessary. We need to run them before the task is visible

1131

* necessary. We need to run them before the task is visible

1262

* on the tasklist. */

1132

* on the tasklist. */

1263

cgroup_fork_callbacks(p);

1133

cgroup_fork_callbacks(p);

1264

cgroup_callbacks_done = 1;

1134

cgroup_callbacks_done = 1;

1265

1135

1266

/* Need tasklist lock for parent etc handling! */

1136

/* Need tasklist lock for parent etc handling! */

1267

write_lock_irq(&tasklist_lock);

1137

write_lock_irq(&tasklist_lock);

1268

1138

1269

/*

1139

/*

1270

* The task hasn't been attached yet, so its cpus_allowed mask will

1140

* The task hasn't been attached yet, so its cpus_allowed mask will

1271

* not be changed, nor will its assigned CPU.

1141

* not be changed, nor will its assigned CPU.

1272

*

1142

*

1273

* The cpus_allowed mask of the parent may have changed after it was

1143

* The cpus_allowed mask of the parent may have changed after it was

1274

* copied first time - so re-copy it here, then check the child's CPU

1144

* copied first time - so re-copy it here, then check the child's CPU

1275

* to ensure it is on a valid CPU (and if not, just force it back to

1145

* to ensure it is on a valid CPU (and if not, just force it back to

1276

* parent's CPU). This avoids alot of nasty races.

1146

* parent's CPU). This avoids alot of nasty races.

1277

*/

1147

*/

1278

p->cpus_allowed = current->cpus_allowed;

1148

p->cpus_allowed = current->cpus_allowed;

1279

p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;

1149

p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;

1280

if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||

1150

if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||

1281

!cpu_online(task_cpu(p))))

1151

!cpu_online(task_cpu(p))))

1282

set_task_cpu(p, smp_processor_id());

1152

set_task_cpu(p, smp_processor_id());

1283

1153

1284

/* CLONE_PARENT re-uses the old parent */

1154

/* CLONE_PARENT re-uses the old parent */

1285

if (clone_flags & (CLONE_PARENT|CLONE_THREAD))

1155

if (clone_flags & (CLONE_PARENT|CLONE_THREAD))

1286

p->real_parent = current->real_parent;

1156

p->real_parent = current->real_parent;

1287

else

1157

else

1288

p->real_parent = current;

1158

p->real_parent = current;

1289

p->parent = p->real_parent;

1159

p->parent = p->real_parent;

1290

1160

1291

spin_lock(&current->sighand->siglock);

1161

spin_lock(&current->sighand->siglock);

1292

1162

1293

/*

1163

/*

1294

* Process group and session signals need to be delivered to just the

1164

* Process group and session signals need to be delivered to just the

1295

* parent before the fork or both the parent and the child after the

1165

* parent before the fork or both the parent and the child after the

1296

* fork. Restart if a signal comes in before we add the new process to

1166

* fork. Restart if a signal comes in before we add the new process to

1297

* it's process group.

1167

* it's process group.

1298

* A fatal signal pending means that current will exit, so the new

1168

* A fatal signal pending means that current will exit, so the new

1299

* thread can't slip out of an OOM kill (or normal SIGKILL).

1169

* thread can't slip out of an OOM kill (or normal SIGKILL).

1300

*/

1170

*/

1301

recalc_sigpending();

1171

recalc_sigpending();

1302

if (signal_pending(current)) {

1172

if (signal_pending(current)) {

1303

spin_unlock(&current->sighand->siglock);

1173

spin_unlock(&current->sighand->siglock);

1304

write_unlock_irq(&tasklist_lock);

1174

write_unlock_irq(&tasklist_lock);

1305

retval = -ERESTARTNOINTR;

1175

retval = -ERESTARTNOINTR;

1306

goto bad_fork_free_pid;

1176

goto bad_fork_free_pid;

1307

}

1177

}

1308

1178

1309

if (clone_flags & CLONE_THREAD) {

1179

if (clone_flags & CLONE_THREAD) {

1310

p->group_leader = current->group_leader;

1180

p->group_leader = current->group_leader;

1311

list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);

1181

list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);

1312

1182

1313

if (!cputime_eq(current->signal->it_virt_expires,

1183

if (!cputime_eq(current->signal->it_virt_expires,

1314

cputime_zero) ||

1184

cputime_zero) ||

1315

!cputime_eq(current->signal->it_prof_expires,

1185

!cputime_eq(current->signal->it_prof_expires,

1316

cputime_zero) ||

1186

cputime_zero) ||

1317

current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||

1187

current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||

1318

!list_empty(&current->signal->cpu_timers[0]) ||

1188

!list_empty(&current->signal->cpu_timers[0]) ||

1319

!list_empty(&current->signal->cpu_timers[1]) ||

1189

!list_empty(&current->signal->cpu_timers[1]) ||

1320

!list_empty(&current->signal->cpu_timers[2])) {

1190

!list_empty(&current->signal->cpu_timers[2])) {

1321

/*

1191

/*

1322

* Have child wake up on its first tick to check

1192

* Have child wake up on its first tick to check

1323

* for process CPU timers.

1193

* for process CPU timers.

1324

*/

1194

*/

1325

p->it_prof_expires = jiffies_to_cputime(1);

1195

p->it_prof_expires = jiffies_to_cputime(1);

1326

}

1196

}

1327

}

1197

}

1328

1198

1329

if (likely(p->pid)) {

1199

if (likely(p->pid)) {

1330

add_parent(p);

1200

add_parent(p);

1331

if (unlikely(p->ptrace & PT_PTRACED))

1201

if (unlikely(p->ptrace & PT_PTRACED))

1332

__ptrace_link(p, current->parent);

1202

__ptrace_link(p, current->parent);

1333

1203

1334

if (thread_group_leader(p)) {

1204

if (thread_group_leader(p)) {

1335

if (clone_flags & CLONE_NEWPID)

1205

if (clone_flags & CLONE_NEWPID)

1336

p->nsproxy->pid_ns->child_reaper = p;

1206

p->nsproxy->pid_ns->child_reaper = p;

1337

1207

1338

p->signal->leader_pid = pid;

1208

p->signal->leader_pid = pid;

1339

p->signal->tty = current->signal->tty;

1209

p->signal->tty = current->signal->tty;

1340

set_task_pgrp(p, task_pgrp_nr(current));

1210

set_task_pgrp(p, task_pgrp_nr(current));

1341

set_task_session(p, task_session_nr(current));

1211

set_task_session(p, task_session_nr(current));

1342

attach_pid(p, PIDTYPE_PGID, task_pgrp(current));

1212

attach_pid(p, PIDTYPE_PGID, task_pgrp(current));

1343

attach_pid(p, PIDTYPE_SID, task_session(current));

1213

attach_pid(p, PIDTYPE_SID, task_session(current));

1344

list_add_tail_rcu(&p->tasks, &init_task.tasks);

1214

list_add_tail_rcu(&p->tasks, &init_task.tasks);

1345

__get_cpu_var(process_counts)++;

1215

__get_cpu_var(process_counts)++;

1346

}

1216

}

1347

attach_pid(p, PIDTYPE_PID, pid);

1217

attach_pid(p, PIDTYPE_PID, pid);

1348

nr_threads++;

1218

nr_threads++;

1349

}

1219

}

1350

1220

1351

total_forks++;

1221

total_forks++;

1352

spin_unlock(&current->sighand->siglock);

1222

spin_unlock(&current->sighand->siglock);

1353

write_unlock_irq(&tasklist_lock);

1223

write_unlock_irq(&tasklist_lock);

1354

proc_fork_connector(p);

1224

proc_fork_connector(p);

1355

cgroup_post_fork(p);

1225

cgroup_post_fork(p);

1356

return p;

1226

return p;

1357

1227

1358

bad_fork_free_pid:

1228

bad_fork_free_pid:

1359

if (pid != &init_struct_pid)

1229

if (pid != &init_struct_pid)

1360

free_pid(pid);

1230

free_pid(pid);

1361

bad_fork_cleanup_io:

1231

bad_fork_cleanup_io:

1362

put_io_context(p->io_context);

1232

put_io_context(p->io_context);

1363

bad_fork_cleanup_namespaces:

1233

bad_fork_cleanup_namespaces:

1364

exit_task_namespaces(p);

1234

exit_task_namespaces(p);

1365

bad_fork_cleanup_keys:

1235

bad_fork_cleanup_keys:

1366

exit_keys(p);

1236

exit_keys(p);

1367

bad_fork_cleanup_mm:

1237

bad_fork_cleanup_mm:

1368

if (p->mm)

1238

if (p->mm)

1369

mmput(p->mm);

1239

mmput(p->mm);

1370

bad_fork_cleanup_signal:

1240

bad_fork_cleanup_signal:

1371

cleanup_signal(p);

1241

cleanup_signal(p);

1372

bad_fork_cleanup_sighand:

1242

bad_fork_cleanup_sighand:

1373

__cleanup_sighand(p->sighand);

1243

__cleanup_sighand(p->sighand);

1374

bad_fork_cleanup_fs:

1244

bad_fork_cleanup_fs:

1375

exit_fs(p); /* blocking */

1245

exit_fs(p); /* blocking */

1376

bad_fork_cleanup_files:

1246

bad_fork_cleanup_files:

1377

exit_files(p); /* blocking */

1247

exit_files(p); /* blocking */

1378

bad_fork_cleanup_semundo:

1248

bad_fork_cleanup_semundo:

1379

exit_sem(p);

1249

exit_sem(p);

1380

bad_fork_cleanup_audit:

1250

bad_fork_cleanup_audit:

1381

audit_free(p);

1251

audit_free(p);

1382

bad_fork_cleanup_security:

1252

bad_fork_cleanup_security:

1383

security_task_free(p);

1253

security_task_free(p);

1384

bad_fork_cleanup_policy:

1254

bad_fork_cleanup_policy:

1385

#ifdef CONFIG_NUMA

1255

#ifdef CONFIG_NUMA

1386

mpol_put(p->mempolicy);

1256

mpol_put(p->mempolicy);

1387

bad_fork_cleanup_cgroup:

1257

bad_fork_cleanup_cgroup:

1388

#endif

1258

#endif

1389

cgroup_exit(p, cgroup_callbacks_done);

1259

cgroup_exit(p, cgroup_callbacks_done);

1390

delayacct_tsk_free(p);

1260

delayacct_tsk_free(p);

1391

if (p->binfmt)

1261

if (p->binfmt)

1392

module_put(p->binfmt->module);

1262

module_put(p->binfmt->module);

1393

bad_fork_cleanup_put_domain:

1263

bad_fork_cleanup_put_domain:

1394

module_put(task_thread_info(p)->exec_domain->module);

1264

module_put(task_thread_info(p)->exec_domain->module);

1395

bad_fork_cleanup_count:

1265

bad_fork_cleanup_count:

1396

put_group_info(p->group_info);

1266

put_group_info(p->group_info);

1397

atomic_dec(&p->user->processes);

1267

atomic_dec(&p->user->processes);

1398

free_uid(p->user);

1268

free_uid(p->user);

1399

bad_fork_free:

1269

bad_fork_free:

1400

free_task(p);

1270

free_task(p);

1401

fork_out:

1271

fork_out:

1402

return ERR_PTR(retval);

1272

return ERR_PTR(retval);

1403

}

1273

}

1404

1274

1405

noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)

1275

noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)

1406

{

1276

{

1407

memset(regs, 0, sizeof(struct pt_regs));

1277

memset(regs, 0, sizeof(struct pt_regs));

1408

return regs;

1278

return regs;

1409

}

1279

}

1410

1280

1411

struct task_struct * __cpuinit fork_idle(int cpu)

1281

struct task_struct * __cpuinit fork_idle(int cpu)

1412

{

1282

{

1413

struct task_struct *task;

1283

struct task_struct *task;

1414

struct pt_regs regs;

1284

struct pt_regs regs;

1415

1285

1416

task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,

1286

task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,

1417

&init_struct_pid);

1287

&init_struct_pid);

1418

if (!IS_ERR(task))

1288

if (!IS_ERR(task))

1419

init_idle(task, cpu);

1289

init_idle(task, cpu);

1420

1290

1421

return task;

1291

return task;

1422

}

1292

}

1423

1293

1424

static int fork_traceflag(unsigned clone_flags)

1294

static int fork_traceflag(unsigned clone_flags)

1425

{

1295

{

1426

if (clone_flags & CLONE_UNTRACED)

1296

if (clone_flags & CLONE_UNTRACED)

1427

return 0;

1297

return 0;

1428

else if (clone_flags & CLONE_VFORK) {

1298

else if (clone_flags & CLONE_VFORK) {

1429

if (current->ptrace & PT_TRACE_VFORK)

1299

if (current->ptrace & PT_TRACE_VFORK)

1430

return PTRACE_EVENT_VFORK;

1300

return PTRACE_EVENT_VFORK;

1431

} else if ((clone_flags & CSIGNAL) != SIGCHLD) {

1301

} else if ((clone_flags & CSIGNAL) != SIGCHLD) {

1432

if (current->ptrace & PT_TRACE_CLONE)

1302

if (current->ptrace & PT_TRACE_CLONE)

1433

return PTRACE_EVENT_CLONE;

1303

return PTRACE_EVENT_CLONE;

1434

} else if (current->ptrace & PT_TRACE_FORK)

1304

} else if (current->ptrace & PT_TRACE_FORK)

1435

return PTRACE_EVENT_FORK;

1305

return PTRACE_EVENT_FORK;

1436

1306

1437

return 0;

1307

return 0;

1438

}

1308

}

1439

1309

1440

/*

1310

/*

1441

* Ok, this is the main fork-routine.

1311

* Ok, this is the main fork-routine.

1442

*

1312

*

1443

* It copies the process, and if successful kick-starts

1313

* It copies the process, and if successful kick-starts

1444

* it and waits for it to finish using the VM if required.

1314

* it and waits for it to finish using the VM if required.

1445

*/

1315

*/

1446

long do_fork(unsigned long clone_flags,

1316

long do_fork(unsigned long clone_flags,

1447

unsigned long stack_start,

1317

unsigned long stack_start,

1448

struct pt_regs *regs,

1318

struct pt_regs *regs,

1449

unsigned long stack_size,

1319

unsigned long stack_size,

1450

int __user *parent_tidptr,

1320

int __user *parent_tidptr,

1451

int __user *child_tidptr)

1321

int __user *child_tidptr)

1452

{

1322

{

1453

struct task_struct *p;

1323

struct task_struct *p;

1454

int trace = 0;

1324

int trace = 0;

1455

long nr;

1325

long nr;

1456

1326

1457

/*

1327

/*

1458

* We hope to recycle these flags after 2.6.26

1328

* We hope to recycle these flags after 2.6.26

1459

*/

1329

*/

1460

if (unlikely(clone_flags & CLONE_STOPPED)) {

1330

if (unlikely(clone_flags & CLONE_STOPPED)) {

1461

static int __read_mostly count = 100;

1331

static int __read_mostly count = 100;

1462

1332

1463

if (count > 0 && printk_ratelimit()) {

1333

if (count > 0 && printk_ratelimit()) {

1464

char comm[TASK_COMM_LEN];

1334

char comm[TASK_COMM_LEN];

1465

1335

1466

count--;

1336

count--;

1467

printk(KERN_INFO "fork(): process `%s' used deprecated "

1337

printk(KERN_INFO "fork(): process `%s' used deprecated "

1468

"clone flags 0x%lx\n",

1338

"clone flags 0x%lx\n",

1469

get_task_comm(comm, current),

1339

get_task_comm(comm, current),

1470

clone_flags & CLONE_STOPPED);

1340

clone_flags & CLONE_STOPPED);

1471

}

1341

}

1472

}

1342

}

1473

1343

1474

if (unlikely(current->ptrace)) {

1344

if (unlikely(current->ptrace)) {

1475

trace = fork_traceflag (clone_flags);

1345

trace = fork_traceflag (clone_flags);

1476

if (trace)

1346

if (trace)

1477

clone_flags |= CLONE_PTRACE;

1347

clone_flags |= CLONE_PTRACE;

1478

}

1348

}

1479

1349

1480

p = copy_process(clone_flags, stack_start, regs, stack_size,

1350

p = copy_process(clone_flags, stack_start, regs, stack_size,

1481

child_tidptr, NULL);

1351

child_tidptr, NULL);

1482

/*

1352

/*

1483

* Do this prior waking up the new thread - the thread pointer

1353

* Do this prior waking up the new thread - the thread pointer

1484

* might get invalid after that point, if the thread exits quickly.

1354

* might get invalid after that point, if the thread exits quickly.

1485

*/

1355

*/

1486

if (!IS_ERR(p)) {

1356

if (!IS_ERR(p)) {

1487

struct completion vfork;

1357

struct completion vfork;

1488

1358

1489

nr = task_pid_vnr(p);

1359

nr = task_pid_vnr(p);

1490

1360

1491

if (clone_flags & CLONE_PARENT_SETTID)

1361

if (clone_flags & CLONE_PARENT_SETTID)

1492

put_user(nr, parent_tidptr);

1362

put_user(nr, parent_tidptr);

1493

1363

1494

if (clone_flags & CLONE_VFORK) {

1364

if (clone_flags & CLONE_VFORK) {

1495

p->vfork_done = &vfork;

1365

p->vfork_done = &vfork;

1496

init_completion(&vfork);

1366

init_completion(&vfork);

1497

}

1367

}

1498

1368

1499

if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {

1369

if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {

1500

/*

1370

/*

1501

* We'll start up with an immediate SIGSTOP.

1371

* We'll start up with an immediate SIGSTOP.

1502

*/

1372

*/

1503

sigaddset(&p->pending.signal, SIGSTOP);

1373

sigaddset(&p->pending.signal, SIGSTOP);

1504

set_tsk_thread_flag(p, TIF_SIGPENDING);

1374

set_tsk_thread_flag(p, TIF_SIGPENDING);

1505

}

1375

}

1506

1376

1507

if (!(clone_flags & CLONE_STOPPED))

1377

if (!(clone_flags & CLONE_STOPPED))

1508

wake_up_new_task(p, clone_flags);

1378

wake_up_new_task(p, clone_flags);

1509

else

1379

else

1510

__set_task_state(p, TASK_STOPPED);

1380

__set_task_state(p, TASK_STOPPED);

1511

1381

1512

if (unlikely (trace)) {

1382

if (unlikely (trace)) {

1513

current->ptrace_message = nr;

1383

current->ptrace_message = nr;

1514

ptrace_notify ((trace << 8) | SIGTRAP);

1384

ptrace_notify ((trace << 8) | SIGTRAP);

1515

}

1385

}

1516

1386

1517

if (clone_flags & CLONE_VFORK) {

1387

if (clone_flags & CLONE_VFORK) {

1518

freezer_do_not_count();

1388

freezer_do_not_count();

1519

wait_for_completion(&vfork);

1389

wait_for_completion(&vfork);

1520

freezer_count();

1390

freezer_count();

1521

if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {

1391

if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {

1522

current->ptrace_message = nr;

1392

current->ptrace_message = nr;

1523

ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);

1393

ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);

1524

}

1394

}

1525

}

1395

}

1526

} else {

1396

} else {

1527

nr = PTR_ERR(p);

1397

nr = PTR_ERR(p);

1528

}

1398

}

1529

return nr;

1399

return nr;

1530

}

1400

}

1531

1401

1532

#ifndef ARCH_MIN_MMSTRUCT_ALIGN

1402

#ifndef ARCH_MIN_MMSTRUCT_ALIGN

1533

#define ARCH_MIN_MMSTRUCT_ALIGN 0

1403

#define ARCH_MIN_MMSTRUCT_ALIGN 0

1534

#endif

1404

#endif

1535

1405

1536

static void sighand_ctor(struct kmem_cache *cachep, void *data)

1406

static void sighand_ctor(struct kmem_cache *cachep, void *data)

1537

{

1407

{

1538

struct sighand_struct *sighand = data;

1408

struct sighand_struct *sighand = data;

1539

1409

1540

spin_lock_init(&sighand->siglock);

1410

spin_lock_init(&sighand->siglock);

1541

init_waitqueue_head(&sighand->signalfd_wqh);

1411

init_waitqueue_head(&sighand->signalfd_wqh);

1542

}

1412

}

1543

1413

1544

void __init proc_caches_init(void)

1414

void __init proc_caches_init(void)

1545

{

1415

{

1546

sighand_cachep = kmem_cache_create("sighand_cache",

1416

sighand_cachep = kmem_cache_create("sighand_cache",

1547

sizeof(struct sighand_struct), 0,

1417

sizeof(struct sighand_struct), 0,

1548

SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,

1418

SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,

1549

sighand_ctor);

1419

sighand_ctor);

1550

signal_cachep = kmem_cache_create("signal_cache",

1420

signal_cachep = kmem_cache_create("signal_cache",

1551

sizeof(struct signal_struct), 0,

1421

sizeof(struct signal_struct), 0,

1552

SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

1422

SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

1553

files_cachep = kmem_cache_create("files_cache",

1423

files_cachep = kmem_cache_create("files_cache",

1554

sizeof(struct files_struct), 0,

1424

sizeof(struct files_struct), 0,

1555

SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

1425

SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

1556

fs_cachep = kmem_cache_create("fs_cache",

1426

fs_cachep = kmem_cache_create("fs_cache",

1557

sizeof(struct fs_struct), 0,

1427

sizeof(struct fs_struct), 0,

1558

SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

1428

SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

1559

vm_area_cachep = kmem_cache_create("vm_area_struct",

1429

vm_area_cachep = kmem_cache_create("vm_area_struct",

1560

sizeof(struct vm_area_struct), 0,

1430

sizeof(struct vm_area_struct), 0,

1561

SLAB_PANIC, NULL);

1431

SLAB_PANIC, NULL);

1562

mm_cachep = kmem_cache_create("mm_struct",

1432

mm_cachep = kmem_cache_create("mm_struct",

1563

sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,

1433

sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,

1564

SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

1434

SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

1565

}

1435

}

1566

1436

1567

/*

1437

/*

1568

* Check constraints on flags passed to the unshare system call and

1438

* Check constraints on flags passed to the unshare system call and

1569

* force unsharing of additional process context as appropriate.

1439

* force unsharing of additional process context as appropriate.

1570

*/

1440

*/

1571

static void check_unshare_flags(unsigned long *flags_ptr)

1441

static void check_unshare_flags(unsigned long *flags_ptr)

1572

{

1442

{

1573

/*

1443

/*

1574

* If unsharing a thread from a thread group, must also

1444

* If unsharing a thread from a thread group, must also

1575

* unshare vm.

1445

* unshare vm.

1576

*/

1446

*/

1577

if (*flags_ptr & CLONE_THREAD)

1447

if (*flags_ptr & CLONE_THREAD)

1578

*flags_ptr |= CLONE_VM;

1448

*flags_ptr |= CLONE_VM;

1579

1449

1580

/*

1450

/*

1581

* If unsharing vm, must also unshare signal handlers.

1451

* If unsharing vm, must also unshare signal handlers.

1582

*/

1452

*/

1583

if (*flags_ptr & CLONE_VM)

1453

if (*flags_ptr & CLONE_VM)

1584

*flags_ptr |= CLONE_SIGHAND;

1454

*flags_ptr |= CLONE_SIGHAND;

1585

1455

1586

/*

1456

/*

1587

* If unsharing signal handlers and the task was created

1457

* If unsharing signal handlers and the task was created

1588

* using CLONE_THREAD, then must unshare the thread

1458

* using CLONE_THREAD, then must unshare the thread

1589

*/

1459

*/

1590

if ((*flags_ptr & CLONE_SIGHAND) &&

1460

if ((*flags_ptr & CLONE_SIGHAND) &&

1591

(atomic_read(&current->signal->count) > 1))

1461

(atomic_read(&current->signal->count) > 1))

1592

*flags_ptr |= CLONE_THREAD;

1462

*flags_ptr |= CLONE_THREAD;

1593

1463

1594

/*

1464

/*

1595

* If unsharing namespace, must also unshare filesystem information.

1465

* If unsharing namespace, must also unshare filesystem information.

1596

*/

1466

*/

1597

if (*flags_ptr & CLONE_NEWNS)

1467

if (*flags_ptr & CLONE_NEWNS)

1598

*flags_ptr |= CLONE_FS;

1468

*flags_ptr |= CLONE_FS;

1599

}

1469

}

1600

1470

1601

/*

1471

/*

1602

* Unsharing of tasks created with CLONE_THREAD is not supported yet

1472

* Unsharing of tasks created with CLONE_THREAD is not supported yet

1603

*/

1473

*/

1604

static int unshare_thread(unsigned long unshare_flags)

1474

static int unshare_thread(unsigned long unshare_flags)

1605

{

1475

{

1606

if (unshare_flags & CLONE_THREAD)

1476

if (unshare_flags & CLONE_THREAD)

1607

return -EINVAL;

1477

return -EINVAL;

1608

1478

1609

return 0;

1479

return 0;

1610

}

1480

}

1611

1481

1612

/*

1482

/*

1613

* Unshare the filesystem structure if it is being shared

1483

* Unshare the filesystem structure if it is being shared

1614

*/

1484

*/

1615

static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)

1485

static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)

1616

{

1486

{

1617

struct fs_struct *fs = current->fs;

1487

struct fs_struct *fs = current->fs;

1618

1488

1619

if ((unshare_flags & CLONE_FS) &&

1489

if ((unshare_flags & CLONE_FS) &&

1620

(fs && atomic_read(&fs->count) > 1)) {

1490

(fs && atomic_read(&fs->count) > 1)) {

1621

*new_fsp = __copy_fs_struct(current->fs);

1491

*new_fsp = __copy_fs_struct(current->fs);

1622

if (!*new_fsp)

1492

if (!*new_fsp)

1623

return -ENOMEM;

1493

return -ENOMEM;

1624

}

1494

}

1625

1495

1626

return 0;

1496

return 0;

1627

}

1497

}

1628

1498

1629

/*

1499

/*

1630

* Unsharing of sighand is not supported yet

1500

* Unsharing of sighand is not supported yet

1631

*/

1501

*/

1632

static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)

1502

static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)

1633

{

1503

{

1634

struct sighand_struct *sigh = current->sighand;

1504

struct sighand_struct *sigh = current->sighand;

1635

1505

1636

if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)

1506

if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)

1637

return -EINVAL;

1507

return -EINVAL;

1638

else

1508

else

1639

return 0;

1509

return 0;

1640

}

1510

}

1641

1511

1642

/*

1512

/*

1643

* Unshare vm if it is being shared

1513

* Unshare vm if it is being shared

1644

*/

1514

*/

1645

static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)

1515

static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)

1646

{

1516

{

1647

struct mm_struct *mm = current->mm;

1517

struct mm_struct *mm = current->mm;

1648

1518

1649

if ((unshare_flags & CLONE_VM) &&

1519

if ((unshare_flags & CLONE_VM) &&

1650

(mm && atomic_read(&mm->mm_users) > 1)) {

1520

(mm && atomic_read(&mm->mm_users) > 1)) {

1651

return -EINVAL;

1521

return -EINVAL;

1652

}

1522

}

1653

1523

1654

return 0;

1524

return 0;

1655

}

1525

}

1656

1526

1657

/*

1527

/*

1658

* Unshare file descriptor table if it is being shared

1528

* Unshare file descriptor table if it is being shared

1659

*/

1529

*/

1660

static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)

1530

static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)

1661

{

1531

{

1662

struct files_struct *fd = current->files;

1532

struct files_struct *fd = current->files;

1663

int error = 0;

1533

int error = 0;

1664

1534

1665

if ((unshare_flags & CLONE_FILES) &&

1535

if ((unshare_flags & CLONE_FILES) &&

1666

(fd && atomic_read(&fd->count) > 1)) {

1536

(fd && atomic_read(&fd->count) > 1)) {

1667

*new_fdp = dup_fd(fd, &error);

1537

*new_fdp = dup_fd(fd, &error);

1668

if (!*new_fdp)

1538

if (!*new_fdp)

1669

return error;

1539

return error;

1670

}

1540

}

1671

1541

1672

return 0;

1542

return 0;

1673

}

1543

}

1674

1544

1675

/*

1545

/*

1676

* unshare allows a process to 'unshare' part of the process

1546

* unshare allows a process to 'unshare' part of the process

1677

* context which was originally shared using clone. copy_*

1547

* context which was originally shared using clone. copy_*

1678

* functions used by do_fork() cannot be used here directly

1548

* functions used by do_fork() cannot be used here directly

1679

* because they modify an inactive task_struct that is being

1549

* because they modify an inactive task_struct that is being

1680

* constructed. Here we are modifying the current, active,

1550

* constructed. Here we are modifying the current, active,

1681

* task_struct.

1551

* task_struct.

1682

*/

1552

*/

1683

asmlinkage long sys_unshare(unsigned long unshare_flags)

1553

asmlinkage long sys_unshare(unsigned long unshare_flags)

1684

{

1554

{

1685

int err = 0;

1555

int err = 0;

1686

struct fs_struct *fs, *new_fs = NULL;

1556

struct fs_struct *fs, *new_fs = NULL;

1687

struct sighand_struct *new_sigh = NULL;

1557

struct sighand_struct *new_sigh = NULL;

1688

struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;

1558

struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;

1689

struct files_struct *fd, *new_fd = NULL;

1559

struct files_struct *fd, *new_fd = NULL;

1690

struct nsproxy *new_nsproxy = NULL;

1560

struct nsproxy *new_nsproxy = NULL;

1691

int do_sysvsem = 0;

1561

int do_sysvsem = 0;

1692

1562

1693

check_unshare_flags(&unshare_flags);

1563

check_unshare_flags(&unshare_flags);

1694

1564

1695

/* Return -EINVAL for all unsupported flags */

1565

/* Return -EINVAL for all unsupported flags */

1696

err = -EINVAL;

1566

err = -EINVAL;

1697

if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|

1567

if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|

1698

CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|

1568

CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|

1699

CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|

1569

CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|

1700

CLONE_NEWNET))

1570

CLONE_NEWNET))

1701

goto bad_unshare_out;

1571

goto bad_unshare_out;

1702

1572

1703

/*

1573

/*

1704

* CLONE_NEWIPC must also detach from the undolist: after switching

1574

* CLONE_NEWIPC must also detach from the undolist: after switching

1705

* to a new ipc namespace, the semaphore arrays from the old

1575

* to a new ipc namespace, the semaphore arrays from the old

1706

* namespace are unreachable.

1576

* namespace are unreachable.

1707

*/

1577

*/

1708

if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))

1578

if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))

1709

do_sysvsem = 1;

1579

do_sysvsem = 1;

1710

if ((err = unshare_thread(unshare_flags)))

1580

if ((err = unshare_thread(unshare_flags)))

1711

goto bad_unshare_out;

1581

goto bad_unshare_out;

1712

if ((err = unshare_fs(unshare_flags, &new_fs)))

1582

if ((err = unshare_fs(unshare_flags, &new_fs)))

1713

goto bad_unshare_cleanup_thread;

1583

goto bad_unshare_cleanup_thread;

1714

if ((err = unshare_sighand(unshare_flags, &new_sigh)))

1584

if ((err = unshare_sighand(unshare_flags, &new_sigh)))

1715

goto bad_unshare_cleanup_fs;

1585

goto bad_unshare_cleanup_fs;

1716

if ((err = unshare_vm(unshare_flags, &new_mm)))

1586

if ((err = unshare_vm(unshare_flags, &new_mm)))

1717

goto bad_unshare_cleanup_sigh;

1587

goto bad_unshare_cleanup_sigh;

1718

if ((err = unshare_fd(unshare_flags, &new_fd)))

1588

if ((err = unshare_fd(unshare_flags, &new_fd)))

1719

goto bad_unshare_cleanup_vm;

1589

goto bad_unshare_cleanup_vm;

1720

if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,

1590

if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,

1721

new_fs)))

1591

new_fs)))

1722

goto bad_unshare_cleanup_fd;

1592

goto bad_unshare_cleanup_fd;

1723

1593

1724

if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) {

1594

if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) {

1725

if (do_sysvsem) {

1595

if (do_sysvsem) {

1726

/*

1596

/*

1727

* CLONE_SYSVSEM is equivalent to sys_exit().

1597

* CLONE_SYSVSEM is equivalent to sys_exit().

1728

*/

1598

*/

1729

exit_sem(current);

1599

exit_sem(current);

1730

}

1600

}

1731

1601

1732

if (new_nsproxy) {

1602

if (new_nsproxy) {

1733

switch_task_namespaces(current, new_nsproxy);

1603

switch_task_namespaces(current, new_nsproxy);

1734

new_nsproxy = NULL;

1604

new_nsproxy = NULL;

1735

}

1605

}

1736

1606

1737

task_lock(current);

1607

task_lock(current);

1738

1608

1739

if (new_fs) {

1609

if (new_fs) {

1740

fs = current->fs;

1610

fs = current->fs;

1741

current->fs = new_fs;

1611

current->fs = new_fs;

1742

new_fs = fs;

1612

new_fs = fs;

1743

}

1613

}

1744

1614

1745

if (new_mm) {

1615

if (new_mm) {

1746

mm = current->mm;

1616

mm = current->mm;

1747

active_mm = current->active_mm;

1617

active_mm = current->active_mm;

1748

current->mm = new_mm;

1618

current->mm = new_mm;

1749

current->active_mm = new_mm;

1619

current->active_mm = new_mm;

1750

activate_mm(active_mm, new_mm);

1620

activate_mm(active_mm, new_mm);

1751

new_mm = mm;

1621

new_mm = mm;

1752

}

1622

}

1753

1623

1754

if (new_fd) {

1624

if (new_fd) {

1755

fd = current->files;

1625

fd = current->files;

1756

current->files = new_fd;

1626

current->files = new_fd;

1757

new_fd = fd;

1627

new_fd = fd;

1758

}

1628

}

1759

1629

1760

task_unlock(current);

1630

task_unlock(current);

1761

}

1631

}

1762

1632

1763

if (new_nsproxy)

1633

if (new_nsproxy)

1764

put_nsproxy(new_nsproxy);

1634

put_nsproxy(new_nsproxy);

1765

1635

1766

bad_unshare_cleanup_fd:

1636

bad_unshare_cleanup_fd:

1767

if (new_fd)

1637

if (new_fd)

1768

put_files_struct(new_fd);

1638

put_files_struct(new_fd);

1769

1639

1770

bad_unshare_cleanup_vm:

1640

bad_unshare_cleanup_vm:

1771

if (new_mm)

1641

if (new_mm)

1772

mmput(new_mm);

1642

mmput(new_mm);

1773

1643

1774

bad_unshare_cleanup_sigh:

1644

bad_unshare_cleanup_sigh:

1775

if (new_sigh)

1645

if (new_sigh)

1776

if (atomic_dec_and_test(&new_sigh->count))

1646

if (atomic_dec_and_test(&new_sigh->count))

1777

kmem_cache_free(sighand_cachep, new_sigh);

1647

kmem_cache_free(sighand_cachep, new_sigh);

1778

1648

1779

bad_unshare_cleanup_fs:

1649

bad_unshare_cleanup_fs:

1780

if (new_fs)

1650

if (new_fs)

1781

put_fs_struct(new_fs);

1651

put_fs_struct(new_fs);

1782

1652

1783

bad_unshare_cleanup_thread:

1653

bad_unshare_cleanup_thread:

1784

bad_unshare_out:

1654

bad_unshare_out:

1785

return err;

1655

return err;

1786

}

1656

}

1787

1657

1788

/*

1658

/*

1789

* Helper to unshare the files of the current task.

1659

* Helper to unshare the files of the current task.

1790

* We don't want to expose copy_files internals to

1660

* We don't want to expose copy_files internals to

1791

* the exec layer of the kernel.

1661

* the exec layer of the kernel.

1792

*/

1662

*/

1793

1663

1794

int unshare_files(struct files_struct **displaced)

1664

int unshare_files(struct files_struct **displaced)

1795

{

1665

{

1796

struct task_struct *task = current;

1666

struct task_struct *task = current;

1797

struct files_struct *copy = NULL;

1667

struct files_struct *copy = NULL;

1798

int error;

1668

int error;

1799

1669

1800

error = unshare_fd(CLONE_FILES, &copy);

1670

error = unshare_fd(CLONE_FILES, &copy);

1801

if (error || !copy) {

1671

if (error || !copy) {

1802

*displaced = NULL;

1672

*displaced = NULL;

1803

return error;

1673

return error;

1804

}

1674

}

1805

*displaced = task->files;

1675

*displaced = task->files;

1806

task_lock(task);

1676

task_lock(task);

1807

task->files = copy;

1677

task->files = copy;

1808

task_unlock(task);

1678

task_unlock(task);

1809

return 0;

1679

return 0;

1810

}

1680

}

1811

1681

GITLAB

Eric Lee / smarc-fsl-linux-kernel

[PATCH] dup_fd() fixes, part 1

 /*
  *  linux/fs/file.c
  *
  *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
  *
  *  Manage the dynamic fd arrays in the process files_struct.
  */
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/time.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
 struct fdtable_defer {
 	spinlock_t lock;
 	struct work_struct wq;
 	struct fdtable *next;
 };
 int sysctl_nr_open __read_mostly = 1024*1024;
 /*
  * We use this list to defer free fdtables that have vmalloced
  * sets/arrays. By keeping a per-cpu list, we avoid having to embed
  * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
  * this per-task structure.
  */
 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
 static inline void * alloc_fdmem(unsigned int size)
 {
 	if (size <= PAGE_SIZE)
 		return kmalloc(size, GFP_KERNEL);
 	else
 		return vmalloc(size);
 }
 static inline void free_fdarr(struct fdtable *fdt)
 {
 	if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *)))
 		kfree(fdt->fd);
 	else
 		vfree(fdt->fd);
 }
 static inline void free_fdset(struct fdtable *fdt)
 {
 	if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2))
 		kfree(fdt->open_fds);
 	else
 		vfree(fdt->open_fds);
 }
 static void free_fdtable_work(struct work_struct *work)
 {
 	struct fdtable_defer *f =
 		container_of(work, struct fdtable_defer, wq);
 	struct fdtable *fdt;
 	spin_lock_bh(&f->lock);
 	fdt = f->next;
 	f->next = NULL;
 	spin_unlock_bh(&f->lock);
 	while(fdt) {
 		struct fdtable *next = fdt->next;
 		vfree(fdt->fd);
 		free_fdset(fdt);
 		kfree(fdt);
 		fdt = next;
 	}
 }
 void free_fdtable_rcu(struct rcu_head *rcu)
 {
 	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
 	struct fdtable_defer *fddef;
 	BUG_ON(!fdt);
 	if (fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
 		 * This fdtable is embedded in the files structure and that
 		 * structure itself is getting destroyed.
 		 */
 		kmem_cache_free(files_cachep,
 				container_of(fdt, struct files_struct, fdtab));
 		return;
 	}
 	if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) {
 		kfree(fdt->fd);
 		kfree(fdt->open_fds);
 		kfree(fdt);
 	} else {
 		fddef = &get_cpu_var(fdtable_defer_list);
 		spin_lock(&fddef->lock);
 		fdt->next = fddef->next;
 		fddef->next = fdt;
 		/* vmallocs are handled from the workqueue context */
 		schedule_work(&fddef->wq);
 		spin_unlock(&fddef->lock);
 		put_cpu_var(fdtable_defer_list);
 	}
 }
 /*
  * Expand the fdset in the files_struct.  Called with the files spinlock
  * held for write.
  */
 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
 {
 	unsigned int cpy, set;
 	BUG_ON(nfdt->max_fds < ofdt->max_fds);
 	if (ofdt->max_fds == 0)
 		return;
 	cpy = ofdt->max_fds * sizeof(struct file *);
 	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
 	memcpy(nfdt->fd, ofdt->fd, cpy);
 	memset((char *)(nfdt->fd) + cpy, 0, set);
 	cpy = ofdt->max_fds / BITS_PER_BYTE;
 	set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
 	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
 	memset((char *)(nfdt->open_fds) + cpy, 0, set);
 	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
 	memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
 }
 static struct fdtable * alloc_fdtable(unsigned int nr)
 {
 	struct fdtable *fdt;
 	char *data;
 	/*
 	 * Figure out how many fds we actually want to support in this fdtable.
 	 * Allocation steps are keyed to the size of the fdarray, since it
 	 * grows far faster than any of the other dynamic data. We try to fit
 	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
 	 * and growing in powers of two from there on.
 	 */
 	nr /= (1024 / sizeof(struct file *));
 	nr = roundup_pow_of_two(nr + 1);
 	nr *= (1024 / sizeof(struct file *));
 	/*
 	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
 	 * had been set lower between the check in expand_files() and here.  Deal
 	 * with that in caller, it's cheaper that way.
 	 *
 	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
 	 * bitmaps handling below becomes unpleasant, to put it mildly...
 	 */
 	if (unlikely(nr > sysctl_nr_open))
 		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
 	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
 	if (!fdt)
 		goto out;
 	fdt->max_fds = nr;
 	data = alloc_fdmem(nr * sizeof(struct file *));
 	if (!data)
 		goto out_fdt;
 	fdt->fd = (struct file **)data;
 	data = alloc_fdmem(max_t(unsigned int,
 				 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
 	if (!data)
 		goto out_arr;
 	fdt->open_fds = (fd_set *)data;
 	data += nr / BITS_PER_BYTE;
 	fdt->close_on_exec = (fd_set *)data;
 	INIT_RCU_HEAD(&fdt->rcu);
 	fdt->next = NULL;
 	return fdt;
 out_arr:
 	free_fdarr(fdt);
 out_fdt:
 	kfree(fdt);
 out:
 	return NULL;
 }
 /*
  * Expand the file descriptor table.
  * This function will allocate a new fdtable and both fd array and fdset, of
  * the given size.
  * Return <0 error code on error; 1 on successful completion.
  * The files->file_lock should be held on entry, and will be held on exit.
  */
 static int expand_fdtable(struct files_struct *files, int nr)
 	__releases(files->file_lock)
 	__acquires(files->file_lock)
 {
 	struct fdtable *new_fdt, *cur_fdt;
 	spin_unlock(&files->file_lock);
 	new_fdt = alloc_fdtable(nr);
 	spin_lock(&files->file_lock);
 	if (!new_fdt)
 		return -ENOMEM;
 	/*
 	 * extremely unlikely race - sysctl_nr_open decreased between the check in
 	 * caller and alloc_fdtable().  Cheaper to catch it here...
 	 */
 	if (unlikely(new_fdt->max_fds <= nr)) {
 		free_fdarr(new_fdt);
 		free_fdset(new_fdt);
 		kfree(new_fdt);
 		return -EMFILE;
 	}
 	/*
 	 * Check again since another task may have expanded the fd table while
 	 * we dropped the lock
 	 */
 	cur_fdt = files_fdtable(files);
 	if (nr >= cur_fdt->max_fds) {
 		/* Continue as planned */
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
 		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
 			free_fdtable(cur_fdt);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
 		free_fdarr(new_fdt);
 		free_fdset(new_fdt);
 		kfree(new_fdt);
 	}
 	return 1;
 }
 /*
  * Expand files.
  * This function will expand the file structures, if the requested size exceeds
  * the current capacity and there is room for expansion.
  * Return <0 error code on error; 0 when nothing done; 1 when files were
  * expanded and execution may have blocked.
  * The files->file_lock should be held on entry, and will be held on exit.
  */
 int expand_files(struct files_struct *files, int nr)
 {
 	struct fdtable *fdt;
 	fdt = files_fdtable(files);
 	/* Do we need to expand? */
 	if (nr < fdt->max_fds)
 		return 0;
 	/* Can we expand? */
 	if (nr >= sysctl_nr_open)
 		return -EMFILE;
 	/* All good, so we try */
 	return expand_fdtable(files, nr);
 }
+static int count_open_files(struct fdtable *fdt)
+{
+	int size = fdt->max_fds;
+	int i;
+	/* Find the last open fd */
+	for (i = size/(8*sizeof(long)); i > 0; ) {
+		if (fdt->open_fds->fds_bits[--i])
+			break;
+	}
+	i = (i+1) * 8 * sizeof(long);
+	return i;
+}
+static struct files_struct *alloc_files(void)
+{
+	struct files_struct *newf;
+	struct fdtable *fdt;
+	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
+	if (!newf)
+		goto out;
+	atomic_set(&newf->count, 1);
+	spin_lock_init(&newf->file_lock);
+	newf->next_fd = 0;
+	fdt = &newf->fdtab;
+	fdt->max_fds = NR_OPEN_DEFAULT;
+	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
+	fdt->open_fds = (fd_set *)&newf->open_fds_init;
+	fdt->fd = &newf->fd_array[0];
+	INIT_RCU_HEAD(&fdt->rcu);
+	fdt->next = NULL;
+	rcu_assign_pointer(newf->fdt, fdt);
+out:
+	return newf;
+}
+/*
+ * Allocate a new files structure and copy contents from the
+ * passed in files structure.
+ * errorp will be valid only when the returned files_struct is NULL.
+ */
+struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
+{
+	struct files_struct *newf;
+	struct file **old_fds, **new_fds;
+	int open_files, size, i;
+	struct fdtable *old_fdt, *new_fdt;
+	*errorp = -ENOMEM;
+	newf = alloc_files();
+	if (!newf)
+		goto out;
+	spin_lock(&oldf->file_lock);
+	old_fdt = files_fdtable(oldf);
+	new_fdt = files_fdtable(newf);
+	open_files = count_open_files(old_fdt);
+	/*
+	 * Check whether we need to allocate a larger fd array and fd set.
+	 * Note: we're not a clone task, so the open count won't change.
+	 */
+	if (open_files > new_fdt->max_fds) {
+		new_fdt->max_fds = 0;
+		spin_unlock(&oldf->file_lock);
+		spin_lock(&newf->file_lock);
+		*errorp = expand_files(newf, open_files-1);
+		spin_unlock(&newf->file_lock);
+		if (*errorp < 0)
+			goto out_release;
+		new_fdt = files_fdtable(newf);
+		/*
+		 * Reacquire the oldf lock and a pointer to its fd table
+		 * who knows it may have a new bigger fd table. We need
+		 * the latest pointer.
+		 */
+		spin_lock(&oldf->file_lock);
+		old_fdt = files_fdtable(oldf);
+	}
+	old_fds = old_fdt->fd;
+	new_fds = new_fdt->fd;
+	memcpy(new_fdt->open_fds->fds_bits,
+		old_fdt->open_fds->fds_bits, open_files/8);
+	memcpy(new_fdt->close_on_exec->fds_bits,
+		old_fdt->close_on_exec->fds_bits, open_files/8);
+	for (i = open_files; i != 0; i--) {
+		struct file *f = *old_fds++;
+		if (f) {
+			get_file(f);
+		} else {
+			/*
+			 * The fd may be claimed in the fd bitmap but not yet
+			 * instantiated in the files array if a sibling thread
+			 * is partway through open().  So make sure that this
+			 * fd is available to the new process.
+			 */
+			FD_CLR(open_files - i, new_fdt->open_fds);
+		}
+		rcu_assign_pointer(*new_fds++, f);
+	}
+	spin_unlock(&oldf->file_lock);
+	/* compute the remainder to be cleared */
+	size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
+	/* This is long word aligned thus could use a optimized version */
+	memset(new_fds, 0, size);
+	if (new_fdt->max_fds > open_files) {
+		int left = (new_fdt->max_fds-open_files)/8;
+		int start = open_files / (8 * sizeof(unsigned long));
+		memset(&new_fdt->open_fds->fds_bits[start], 0, left);
+		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
+	}
+	return newf;
+out_release:
+	kmem_cache_free(files_cachep, newf);
+out:
+	return NULL;
+}
 static void __devinit fdtable_defer_list_init(int cpu)
 {
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
 	spin_lock_init(&fddef->lock);
 	INIT_WORK(&fddef->wq, free_fdtable_work);
 	fddef->next = NULL;
 }
 void __init files_defer_init(void)
 {
 	int i;
 	for_each_possible_cpu(i)
 		fdtable_defer_list_init(i);
 }
 struct files_struct init_files = {
 	.count		= ATOMIC_INIT(1),
 	.fdt		= &init_files.fdtab,
 	.fdtab		= {
 		.max_fds	= NR_OPEN_DEFAULT,
 		.fd		= &init_files.fd_array[0],
 		.close_on_exec	= (fd_set *)&init_files.close_on_exec_init,
 		.open_fds	= (fd_set *)&init_files.open_fds_init,
 		.rcu		= RCU_HEAD_INIT,
 	},
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };

 /*
  * descriptor table internals; you almost certainly want file.h instead.
  */
 #ifndef __LINUX_FDTABLE_H
 #define __LINUX_FDTABLE_H
 #include <asm/atomic.h>
 #include <linux/posix_types.h>
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 /*
  * The default fd array needs to be at least BITS_PER_LONG,
  * as this is the granularity returned by copy_fdset().
  */
 #define NR_OPEN_DEFAULT BITS_PER_LONG
 /*
  * The embedded_fd_set is a small fd_set,
  * suitable for most tasks (which open <= BITS_PER_LONG files)
  */
 struct embedded_fd_set {
 	unsigned long fds_bits[1];
 };
 struct fdtable {
 	unsigned int max_fds;
 	struct file ** fd;      /* current fd array */
 	fd_set *close_on_exec;
 	fd_set *open_fds;
 	struct rcu_head rcu;
 	struct fdtable *next;
 };
 /*
  * Open file table structure
  */
 struct files_struct {
   /*
    * read mostly part
    */
 	atomic_t count;
 	struct fdtable *fdt;
 	struct fdtable fdtab;
   /*
    * written part on a separate cache line in SMP
    */
 	spinlock_t file_lock ____cacheline_aligned_in_smp;
 	int next_fd;
 	struct embedded_fd_set close_on_exec_init;
 	struct embedded_fd_set open_fds_init;
 	struct file * fd_array[NR_OPEN_DEFAULT];
 };
 #define files_fdtable(files) (rcu_dereference((files)->fdt))
 extern struct kmem_cache *filp_cachep;
 struct file_operations;
 struct vfsmount;
 struct dentry;
 extern int expand_files(struct files_struct *, int nr);
 extern void free_fdtable_rcu(struct rcu_head *rcu);
 extern void __init files_defer_init(void);
 static inline void free_fdtable(struct fdtable *fdt)
 {
 	call_rcu(&fdt->rcu, free_fdtable_rcu);
 }
 static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
 {
 	struct file * file = NULL;
 	struct fdtable *fdt = files_fdtable(files);
 	if (fd < fdt->max_fds)
 		file = rcu_dereference(fdt->fd[fd]);
 	return file;
 }
 /*
  * Check whether the specified fd has an open file.
  */
 #define fcheck(fd)	fcheck_files(current->files, fd)
 struct task_struct;
 struct files_struct *get_files_struct(struct task_struct *);
 void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct files_struct *);
 int unshare_files(struct files_struct **);
+struct files_struct *dup_fd(struct files_struct *, int *);
 extern struct kmem_cache *files_cachep;
 #endif /* __LINUX_FDTABLE_H */

 /*
  *  linux/kernel/fork.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 /*
  *  'fork.c' contains the help-routines for the 'fork' system call
  * (see also entry.S and others).
  * Fork is rather simple, once you get the hang of it, but the memory
  * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
  */
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/completion.h>
 #include <linux/mnt_namespace.h>
 #include <linux/personality.h>
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/key.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
 #include <linux/fs.h>
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/cgroup.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
 #include <linux/futex.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
 #include <linux/memcontrol.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/freezer.h>
 #include <linux/delayacct.h>
 #include <linux/taskstats_kern.h>
 #include <linux/random.h>
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
 unsigned long total_forks;	/* Handle normal Linux uptimes. */
 int nr_threads; 		/* The idle threads do not count.. */
 int max_threads;		/* tunable limit on nr_threads */
 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 int nr_processes(void)
 {
 	int cpu;
 	int total = 0;
 	for_each_online_cpu(cpu)
 		total += per_cpu(process_counts, cpu);
 	return total;
 }
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
 # define alloc_task_struct()	kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
 # define free_task_struct(tsk)	kmem_cache_free(task_struct_cachep, (tsk))
 static struct kmem_cache *task_struct_cachep;
 #endif
 /* SLAB cache for signal_struct structures (tsk->signal) */
 static struct kmem_cache *signal_cachep;
 /* SLAB cache for sighand_struct structures (tsk->sighand) */
 struct kmem_cache *sighand_cachep;
 /* SLAB cache for files_struct structures (tsk->files) */
 struct kmem_cache *files_cachep;
 /* SLAB cache for fs_struct structures (tsk->fs) */
 struct kmem_cache *fs_cachep;
 /* SLAB cache for vm_area_struct structures */
 struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 void free_task(struct task_struct *tsk)
 {
 	prop_local_destroy_single(&tsk->dirties);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!tsk->exit_state);
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 	security_task_free(tsk);
 	free_uid(tsk->user);
 	put_group_info(tsk->group_info);
 	delayacct_tsk_free(tsk);
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
 }
 /*
  * macro override instead of weak attribute alias, to workaround
  * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
  */
 #ifndef arch_task_cache_init
 #define arch_task_cache_init()
 #endif
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
 #ifndef ARCH_MIN_TASKALIGN
 #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
 #endif
 	/* create a slab on which task_structs can be allocated */
 	task_struct_cachep =
 		kmem_cache_create("task_struct", sizeof(struct task_struct),
 			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
 #endif
 	/* do the arch specific task caches init */
 	arch_task_cache_init();
 	/*
 	 * The default maximum number of threads is set to a safe
 	 * value: the thread structures can take up at most half
 	 * of memory.
 	 */
 	max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
 	/*
 	 * we need to allow at least 20 threads to boot a system
 	 */
 	if(max_threads < 20)
 		max_threads = 20;
 	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
 	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
 }
 int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
 					       struct task_struct *src)
 {
 	*dst = *src;
 	return 0;
 }
 static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
 	struct task_struct *tsk;
 	struct thread_info *ti;
 	int err;
 	prepare_to_copy(orig);
 	tsk = alloc_task_struct();
 	if (!tsk)
 		return NULL;
 	ti = alloc_thread_info(tsk);
 	if (!ti) {
 		free_task_struct(tsk);
 		return NULL;
 	}
  	err = arch_dup_task_struct(tsk, orig);
 	if (err)
 		goto out;
 	tsk->stack = ti;
 	err = prop_local_init_single(&tsk->dirties);
 	if (err)
 		goto out;
 	setup_thread_stack(tsk, orig);
 #ifdef CONFIG_CC_STACKPROTECTOR
 	tsk->stack_canary = get_random_int();
 #endif
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	atomic_set(&tsk->fs_excl, 0);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	tsk->btrace_seq = 0;
 #endif
 	tsk->splice_pipe = NULL;
 	return tsk;
 out:
 	free_thread_info(ti);
 	free_task_struct(tsk);
 	return NULL;
 }
 #ifdef CONFIG_MMU
 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	struct vm_area_struct *mpnt, *tmp, **pprev;
 	struct rb_node **rb_link, *rb_parent;
 	int retval;
 	unsigned long charge;
 	struct mempolicy *pol;
 	down_write(&oldmm->mmap_sem);
 	flush_cache_dup_mm(oldmm);
 	/*
 	 * Not linked in yet - no deadlock potential:
 	 */
 	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 	mm->locked_vm = 0;
 	mm->mmap = NULL;
 	mm->mmap_cache = NULL;
 	mm->free_area_cache = oldmm->mmap_base;
 	mm->cached_hole_size = ~0UL;
 	mm->map_count = 0;
 	cpus_clear(mm->cpu_vm_mask);
 	mm->mm_rb = RB_ROOT;
 	rb_link = &mm->mm_rb.rb_node;
 	rb_parent = NULL;
 	pprev = &mm->mmap;
 	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
 		struct file *file;
 		if (mpnt->vm_flags & VM_DONTCOPY) {
 			long pages = vma_pages(mpnt);
 			mm->total_vm -= pages;
 			vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
 								-pages);
 			continue;
 		}
 		charge = 0;
 		if (mpnt->vm_flags & VM_ACCOUNT) {
 			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 			if (security_vm_enough_memory(len))
 				goto fail_nomem;
 			charge = len;
 		}
 		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
 		pol = mpol_dup(vma_policy(mpnt));
 		retval = PTR_ERR(pol);
 		if (IS_ERR(pol))
 			goto fail_nomem_policy;
 		vma_set_policy(tmp, pol);
 		tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
 		tmp->vm_next = NULL;
 		anon_vma_link(tmp);
 		file = tmp->vm_file;
 		if (file) {
 			struct inode *inode = file->f_path.dentry->d_inode;
 			get_file(file);
 			if (tmp->vm_flags & VM_DENYWRITE)
 				atomic_dec(&inode->i_writecount);
 			/* insert tmp into the share list, just after mpnt */
 			spin_lock(&file->f_mapping->i_mmap_lock);
 			tmp->vm_truncate_count = mpnt->vm_truncate_count;
 			flush_dcache_mmap_lock(file->f_mapping);
 			vma_prio_tree_add(tmp, mpnt);
 			flush_dcache_mmap_unlock(file->f_mapping);
 			spin_unlock(&file->f_mapping->i_mmap_lock);
 		}
 		/*
 		 * Link in the new vma and copy the page table entries.
 		 */
 		*pprev = tmp;
 		pprev = &tmp->vm_next;
 		__vma_link_rb(mm, tmp, rb_link, rb_parent);
 		rb_link = &tmp->vm_rb.rb_right;
 		rb_parent = &tmp->vm_rb;
 		mm->map_count++;
 		retval = copy_page_range(mm, oldmm, mpnt);
 		if (tmp->vm_ops && tmp->vm_ops->open)
 			tmp->vm_ops->open(tmp);
 		if (retval)
 			goto out;
 	}
 	/* a new mm has just been created */
 	arch_dup_mmap(oldmm, mm);
 	retval = 0;
 out:
 	up_write(&mm->mmap_sem);
 	flush_tlb_mm(oldmm);
 	up_write(&oldmm->mmap_sem);
 	return retval;
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
 	goto out;
 }
 static inline int mm_alloc_pgd(struct mm_struct * mm)
 {
 	mm->pgd = pgd_alloc(mm);
 	if (unlikely(!mm->pgd))
 		return -ENOMEM;
 	return 0;
 }
 static inline void mm_free_pgd(struct mm_struct * mm)
 {
 	pgd_free(mm, mm->pgd);
 }
 #else
 #define dup_mmap(mm, oldmm)	(0)
 #define mm_alloc_pgd(mm)	(0)
 #define mm_free_pgd(mm)
 #endif /* CONFIG_MMU */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
 #include <linux/init_task.h>
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->flags = (current->mm) ? current->mm->flags
 				  : MMF_DUMP_FILTER_DEFAULT;
 	mm->core_waiters = 0;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
 	spin_lock_init(&mm->page_table_lock);
 	rwlock_init(&mm->ioctx_list_lock);
 	mm->ioctx_list = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
 	mm_init_owner(mm, p);
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
 		return mm;
 	}
 	free_mm(mm);
 	return NULL;
 }
 /*
  * Allocate and initialize an mm_struct.
  */
 struct mm_struct * mm_alloc(void)
 {
 	struct mm_struct * mm;
 	mm = allocate_mm();
 	if (mm) {
 		memset(mm, 0, sizeof(*mm));
 		mm = mm_init(mm, current);
 	}
 	return mm;
 }
 /*
  * Called when the last reference to the mm
  * is dropped: either by a lazy thread or by
  * mmput. Free the page directory and the mm.
  */
 void __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
 /*
  * Decrement the use count and release all resources for an mm.
  */
 void mmput(struct mm_struct *mm)
 {
 	might_sleep();
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
 			spin_lock(&mmlist_lock);
 			list_del(&mm->mmlist);
 			spin_unlock(&mmlist_lock);
 		}
 		put_swap_token(mm);
 		mmdrop(mm);
 	}
 }
 EXPORT_SYMBOL_GPL(mmput);
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
  * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
  * this kernel workthread has transiently adopted a user mm with use_mm,
  * to do its AIO) is not set and if so returns a reference to it, after
  * bumping up the use count.  User must release the mm via mmput()
  * after use.  Typically used by /proc and ptrace.
  */
 struct mm_struct *get_task_mm(struct task_struct *task)
 {
 	struct mm_struct *mm;
 	task_lock(task);
 	mm = task->mm;
 	if (mm) {
 		if (task->flags & PF_BORROWED_MM)
 			mm = NULL;
 		else
 			atomic_inc(&mm->mm_users);
 	}
 	task_unlock(task);
 	return mm;
 }
 EXPORT_SYMBOL_GPL(get_task_mm);
 /* Please note the differences between mmput and mm_release.
  * mmput is called whenever we stop holding onto a mm_struct,
  * error success whatever.
  *
  * mm_release is called after a mm_struct has been removed
  * from the current process.
  *
  * This difference is important for error handling, when we
  * only half set up a mm_struct for a new process and need to restore
  * the old one.  Because we mmput the new mm_struct before
  * restoring the old one. . .
  * Eric Biederman 10 January 1998
  */
 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
 	struct completion *vfork_done = tsk->vfork_done;
 	/* Get rid of any cached register state */
 	deactivate_mm(tsk, mm);
 	/* notify parent sleeping on vfork() */
 	if (vfork_done) {
 		tsk->vfork_done = NULL;
 		complete(vfork_done);
 	}
 	/*
 	 * If we're exiting normally, clear a user-space tid field if
 	 * requested.  We leave this alone when dying by signal, to leave
 	 * the value intact in a core dump, and to save the unnecessary
 	 * trouble otherwise.  Userland only wants this done for a sys_exit.
 	 */
 	if (tsk->clear_child_tid
 	    && !(tsk->flags & PF_SIGNALED)
 	    && atomic_read(&mm->mm_users) > 1) {
 		u32 __user * tidptr = tsk->clear_child_tid;
 		tsk->clear_child_tid = NULL;
 		/*
 		 * We don't check the error code - if userspace has
 		 * not set up a proper pointer then tough luck.
 		 */
 		put_user(0, tidptr);
 		sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
 	}
 }
 /*
  * Allocate a new mm structure and copy contents from the
  * mm structure of the passed in task structure.
  */
 struct mm_struct *dup_mm(struct task_struct *tsk)
 {
 	struct mm_struct *mm, *oldmm = current->mm;
 	int err;
 	if (!oldmm)
 		return NULL;
 	mm = allocate_mm();
 	if (!mm)
 		goto fail_nomem;
 	memcpy(mm, oldmm, sizeof(*mm));
 	/* Initializing for Swap token stuff */
 	mm->token_priority = 0;
 	mm->last_interval = 0;
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 	if (init_new_context(tsk, mm))
 		goto fail_nocontext;
 	dup_mm_exe_file(oldmm, mm);
 	err = dup_mmap(mm, oldmm);
 	if (err)
 		goto free_pt;
 	mm->hiwater_rss = get_mm_rss(mm);
 	mm->hiwater_vm = mm->total_vm;
 	return mm;
 free_pt:
 	mmput(mm);
 fail_nomem:
 	return NULL;
 fail_nocontext:
 	/*
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
 	 */
 	mm_free_pgd(mm);
 	free_mm(mm);
 	return NULL;
 }
 static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct mm_struct * mm, *oldmm;
 	int retval;
 	tsk->min_flt = tsk->maj_flt = 0;
 	tsk->nvcsw = tsk->nivcsw = 0;
 	tsk->mm = NULL;
 	tsk->active_mm = NULL;
 	/*
 	 * Are we cloning a kernel thread?
 	 *
 	 * We need to steal a active VM for that..
 	 */
 	oldmm = current->mm;
 	if (!oldmm)
 		return 0;
 	if (clone_flags & CLONE_VM) {
 		atomic_inc(&oldmm->mm_users);
 		mm = oldmm;
 		goto good_mm;
 	}
 	retval = -ENOMEM;
 	mm = dup_mm(tsk);
 	if (!mm)
 		goto fail_nomem;
 good_mm:
 	/* Initializing for Swap token stuff */
 	mm->token_priority = 0;
 	mm->last_interval = 0;
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 	return 0;
 fail_nomem:
 	return retval;
 }
 static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 {
 	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
 	/* We don't need to lock fs - think why ;-) */
 	if (fs) {
 		atomic_set(&fs->count, 1);
 		rwlock_init(&fs->lock);
 		fs->umask = old->umask;
 		read_lock(&old->lock);
 		fs->root = old->root;
 		path_get(&old->root);
 		fs->pwd = old->pwd;
 		path_get(&old->pwd);
 		if (old->altroot.dentry) {
 			fs->altroot = old->altroot;
 			path_get(&old->altroot);
 		} else {
 			fs->altroot.mnt = NULL;
 			fs->altroot.dentry = NULL;
 		}
 		read_unlock(&old->lock);
 	}
 	return fs;
 }
 struct fs_struct *copy_fs_struct(struct fs_struct *old)
 {
 	return __copy_fs_struct(old);
 }
 EXPORT_SYMBOL_GPL(copy_fs_struct);
 static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 {
 	if (clone_flags & CLONE_FS) {
 		atomic_inc(&current->fs->count);
 		return 0;
 	}
 	tsk->fs = __copy_fs_struct(current->fs);
 	if (!tsk->fs)
 		return -ENOMEM;
 	return 0;
 }
-static int count_open_files(struct fdtable *fdt)
-{
-	int size = fdt->max_fds;
-	int i;
-	/* Find the last open fd */
-	for (i = size/(8*sizeof(long)); i > 0; ) {
-		if (fdt->open_fds->fds_bits[--i])
-			break;
-	}
-	i = (i+1) * 8 * sizeof(long);
-	return i;
-}
-static struct files_struct *alloc_files(void)
-{
-	struct files_struct *newf;
-	struct fdtable *fdt;
-	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
-	if (!newf)
-		goto out;
-	atomic_set(&newf->count, 1);
-	spin_lock_init(&newf->file_lock);
-	newf->next_fd = 0;
-	fdt = &newf->fdtab;
-	fdt->max_fds = NR_OPEN_DEFAULT;
-	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
-	fdt->open_fds = (fd_set *)&newf->open_fds_init;
-	fdt->fd = &newf->fd_array[0];
-	INIT_RCU_HEAD(&fdt->rcu);
-	fdt->next = NULL;
-	rcu_assign_pointer(newf->fdt, fdt);
-out:
-	return newf;
-}
-/*
- * Allocate a new files structure and copy contents from the
- * passed in files structure.
- * errorp will be valid only when the returned files_struct is NULL.
- */
-static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
-{
-	struct files_struct *newf;
-	struct file **old_fds, **new_fds;
-	int open_files, size, i;
-	struct fdtable *old_fdt, *new_fdt;
-	*errorp = -ENOMEM;
-	newf = alloc_files();
-	if (!newf)
-		goto out;
-	spin_lock(&oldf->file_lock);
-	old_fdt = files_fdtable(oldf);
-	new_fdt = files_fdtable(newf);
-	open_files = count_open_files(old_fdt);
-	/*
-	 * Check whether we need to allocate a larger fd array and fd set.
-	 * Note: we're not a clone task, so the open count won't change.
-	 */
-	if (open_files > new_fdt->max_fds) {
-		new_fdt->max_fds = 0;
-		spin_unlock(&oldf->file_lock);
-		spin_lock(&newf->file_lock);
-		*errorp = expand_files(newf, open_files-1);
-		spin_unlock(&newf->file_lock);
-		if (*errorp < 0)
-			goto out_release;
-		new_fdt = files_fdtable(newf);
-		/*
-		 * Reacquire the oldf lock and a pointer to its fd table
-		 * who knows it may have a new bigger fd table. We need
-		 * the latest pointer.
-		 */
-		spin_lock(&oldf->file_lock);
-		old_fdt = files_fdtable(oldf);
-	}
-	old_fds = old_fdt->fd;
-	new_fds = new_fdt->fd;
-	memcpy(new_fdt->open_fds->fds_bits,
-		old_fdt->open_fds->fds_bits, open_files/8);
-	memcpy(new_fdt->close_on_exec->fds_bits,
-		old_fdt->close_on_exec->fds_bits, open_files/8);
-	for (i = open_files; i != 0; i--) {
-		struct file *f = *old_fds++;
-		if (f) {
-			get_file(f);
-		} else {
-			/*
-			 * The fd may be claimed in the fd bitmap but not yet
-			 * instantiated in the files array if a sibling thread
-			 * is partway through open().  So make sure that this
-			 * fd is available to the new process.
-			 */
-			FD_CLR(open_files - i, new_fdt->open_fds);
-		}
-		rcu_assign_pointer(*new_fds++, f);
-	}
-	spin_unlock(&oldf->file_lock);
-	/* compute the remainder to be cleared */
-	size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
-	/* This is long word aligned thus could use a optimized version */
-	memset(new_fds, 0, size);
-	if (new_fdt->max_fds > open_files) {
-		int left = (new_fdt->max_fds-open_files)/8;
-		int start = open_files / (8 * sizeof(unsigned long));
-		memset(&new_fdt->open_fds->fds_bits[start], 0, left);
-		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
-	}
-	return newf;
-out_release:
-	kmem_cache_free(files_cachep, newf);
-out:
-	return NULL;
-}
 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct files_struct *oldf, *newf;
 	int error = 0;
 	/*
 	 * A background process may not have any files ...
 	 */
 	oldf = current->files;
 	if (!oldf)
 		goto out;
 	if (clone_flags & CLONE_FILES) {
 		atomic_inc(&oldf->count);
 		goto out;
 	}
 	newf = dup_fd(oldf, &error);
 	if (!newf)
 		goto out;
 	tsk->files = newf;
 	error = 0;
 out:
 	return error;
 }
 static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 {
 #ifdef CONFIG_BLOCK
 	struct io_context *ioc = current->io_context;
 	if (!ioc)
 		return 0;
 	/*
 	 * Share io context with parent, if CLONE_IO is set
 	 */
 	if (clone_flags & CLONE_IO) {
 		tsk->io_context = ioc_task_link(ioc);
 		if (unlikely(!tsk->io_context))
 			return -ENOMEM;
 	} else if (ioprio_valid(ioc->ioprio)) {
 		tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
 		if (unlikely(!tsk->io_context))
 			return -ENOMEM;
 		tsk->io_context->ioprio = ioc->ioprio;
 	}
 #endif
 	return 0;
 }
 static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct sighand_struct *sig;
 	if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
 		atomic_inc(&current->sighand->count);
 		return 0;
 	}
 	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
 	rcu_assign_pointer(tsk->sighand, sig);
 	if (!sig)
 		return -ENOMEM;
 	atomic_set(&sig->count, 1);
 	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
 	return 0;
 }
 void __cleanup_sighand(struct sighand_struct *sighand)
 {
 	if (atomic_dec_and_test(&sighand->count))
 		kmem_cache_free(sighand_cachep, sighand);
 }
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 	int ret;
 	if (clone_flags & CLONE_THREAD) {
 		atomic_inc(&current->signal->count);
 		atomic_inc(&current->signal->live);
 		return 0;
 	}
 	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
 	tsk->signal = sig;
 	if (!sig)
 		return -ENOMEM;
 	ret = copy_thread_group_keys(tsk);
 	if (ret < 0) {
 		kmem_cache_free(signal_cachep, sig);
 		return ret;
 	}
 	atomic_set(&sig->count, 1);
 	atomic_set(&sig->live, 1);
 	init_waitqueue_head(&sig->wait_chldexit);
 	sig->flags = 0;
 	sig->group_exit_code = 0;
 	sig->group_exit_task = NULL;
 	sig->group_stop_count = 0;
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
 	INIT_LIST_HEAD(&sig->posix_timers);
 	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
 	sig->it_virt_expires = cputime_zero;
 	sig->it_virt_incr = cputime_zero;
 	sig->it_prof_expires = cputime_zero;
 	sig->it_prof_incr = cputime_zero;
 	sig->leader = 0;	/* session leadership doesn't inherit */
 	sig->tty_old_pgrp = NULL;
 	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
 	sig->gtime = cputime_zero;
 	sig->cgtime = cputime_zero;
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
 	sig->sum_sched_runtime = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
 	INIT_LIST_HEAD(&sig->cpu_timers[2]);
 	taskstats_tgid_init(sig);
 	task_lock(current->group_leader);
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
 	task_unlock(current->group_leader);
 	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
 		/*
 		 * New sole thread in the process gets an expiry time
 		 * of the whole CPU time limit.
 		 */
 		tsk->it_prof_expires =
 			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
 	}
 	acct_init_pacct(&sig->pacct);
 	tty_audit_fork(sig);
 	return 0;
 }
 void __cleanup_signal(struct signal_struct *sig)
 {
 	exit_thread_group_keys(sig);
 	kmem_cache_free(signal_cachep, sig);
 }
 static void cleanup_signal(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 	atomic_dec(&sig->live);
 	if (atomic_dec_and_test(&sig->count))
 		__cleanup_signal(sig);
 }
 static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long new_flags = p->flags;
 	new_flags &= ~PF_SUPERPRIV;
 	new_flags |= PF_FORKNOEXEC;
 	if (!(clone_flags & CLONE_PTRACE))
 		p->ptrace = 0;
 	p->flags = new_flags;
 	clear_freeze_flag(p);
 }
 asmlinkage long sys_set_tid_address(int __user *tidptr)
 {
 	current->clear_child_tid = tidptr;
 	return task_pid_vnr(current);
 }
 static void rt_mutex_init_task(struct task_struct *p)
 {
 	spin_lock_init(&p->pi_lock);
 #ifdef CONFIG_RT_MUTEXES
 	plist_head_init(&p->pi_waiters, &p->pi_lock);
 	p->pi_blocked_on = NULL;
 #endif
 }
 #ifdef CONFIG_MM_OWNER
 void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 {
 	mm->owner = p;
 }
 #endif /* CONFIG_MM_OWNER */
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
  *
  * It copies the registers, and all the appropriate
  * parts of the process environment (as per the clone
  * flags). The actual kick-off is left to the caller.
  */
 static struct task_struct *copy_process(unsigned long clone_flags,
 					unsigned long stack_start,
 					struct pt_regs *regs,
 					unsigned long stack_size,
 					int __user *child_tidptr,
 					struct pid *pid)
 {
 	int retval;
 	struct task_struct *p;
 	int cgroup_callbacks_done = 0;
 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
 	/*
 	 * Thread groups must share signals as well, and detached threads
 	 * can only be started up within the thread group.
 	 */
 	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
 		return ERR_PTR(-EINVAL);
 	/*
 	 * Shared signal handlers imply shared VM. By way of the above,
 	 * thread groups also imply shared VM. Blocking this case allows
 	 * for various simplifications in other code.
 	 */
 	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
 		return ERR_PTR(-EINVAL);
 	retval = security_task_create(clone_flags);
 	if (retval)
 		goto fork_out;
 	retval = -ENOMEM;
 	p = dup_task_struct(current);
 	if (!p)
 		goto fork_out;
 	rt_mutex_init_task(p);
 #ifdef CONFIG_TRACE_IRQFLAGS
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
 	retval = -EAGAIN;
 	if (atomic_read(&p->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
 		    p->user != current->nsproxy->user_ns->root_user)
 			goto bad_fork_free;
 	}
 	atomic_inc(&p->user->__count);
 	atomic_inc(&p->user->processes);
 	get_group_info(p->group_info);
 	/*
 	 * If multiple threads are within copy_process(), then this check
 	 * triggers too late. This doesn't hurt, the check is only there
 	 * to stop root fork bombs.
 	 */
 	if (nr_threads >= max_threads)
 		goto bad_fork_cleanup_count;
 	if (!try_module_get(task_thread_info(p)->exec_domain->module))
 		goto bad_fork_cleanup_count;
 	if (p->binfmt && !try_module_get(p->binfmt->module))
 		goto bad_fork_cleanup_put_domain;
 	p->did_exec = 0;
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
 	copy_flags(clone_flags, p);
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
 #ifdef CONFIG_PREEMPT_RCU
 	p->rcu_read_lock_nesting = 0;
 	p->rcu_flipctr_idx = 0;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 	p->vfork_done = NULL;
 	spin_lock_init(&p->alloc_lock);
 	clear_tsk_thread_flag(p, TIF_SIGPENDING);
 	init_sigpending(&p->pending);
 	p->utime = cputime_zero;
 	p->stime = cputime_zero;
 	p->gtime = cputime_zero;
 	p->utimescaled = cputime_zero;
 	p->stimescaled = cputime_zero;
 	p->prev_utime = cputime_zero;
 	p->prev_stime = cputime_zero;
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 	p->last_switch_count = 0;
 	p->last_switch_timestamp = 0;
 #endif
 #ifdef CONFIG_TASK_XACCT
 	p->rchar = 0;		/* I/O counter: bytes read */
 	p->wchar = 0;		/* I/O counter: bytes written */
 	p->syscr = 0;		/* I/O counter: read syscalls */
 	p->syscw = 0;		/* I/O counter: write syscalls */
 #endif
 	task_io_accounting_init(p);
 	acct_clear_integrals(p);
 	p->it_virt_expires = cputime_zero;
 	p->it_prof_expires = cputime_zero;
 	p->it_sched_expires = 0;
 	INIT_LIST_HEAD(&p->cpu_timers[0]);
 	INIT_LIST_HEAD(&p->cpu_timers[1]);
 	INIT_LIST_HEAD(&p->cpu_timers[2]);
 	p->lock_depth = -1;		/* -1 = no lock */
 	do_posix_clock_monotonic_gettime(&p->start_time);
 	p->real_start_time = p->start_time;
 	monotonic_to_bootbased(&p->real_start_time);
 #ifdef CONFIG_SECURITY
 	p->security = NULL;
 #endif
 	p->cap_bset = current->cap_bset;
 	p->io_context = NULL;
 	p->audit_context = NULL;
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
  	if (IS_ERR(p->mempolicy)) {
  		retval = PTR_ERR(p->mempolicy);
  		p->mempolicy = NULL;
  		goto bad_fork_cleanup_cgroup;
  	}
 	mpol_fix_fork_child_flag(p);
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
 	p->irq_events = 0;
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	p->hardirqs_enabled = 1;
 #else
 	p->hardirqs_enabled = 0;
 #endif
 	p->hardirq_enable_ip = 0;
 	p->hardirq_enable_event = 0;
 	p->hardirq_disable_ip = _THIS_IP_;
 	p->hardirq_disable_event = 0;
 	p->softirqs_enabled = 1;
 	p->softirq_enable_ip = _THIS_IP_;
 	p->softirq_enable_event = 0;
 	p->softirq_disable_ip = 0;
 	p->softirq_disable_event = 0;
 	p->hardirq_context = 0;
 	p->softirq_context = 0;
 #endif
 #ifdef CONFIG_LOCKDEP
 	p->lockdep_depth = 0; /* no locks held yet */
 	p->curr_chain_key = 0;
 	p->lockdep_recursion = 0;
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 	if ((retval = security_task_alloc(p)))
 		goto bad_fork_cleanup_policy;
 	if ((retval = audit_alloc(p)))
 		goto bad_fork_cleanup_security;
 	/* copy all the process information */
 	if ((retval = copy_semundo(clone_flags, p)))
 		goto bad_fork_cleanup_audit;
 	if ((retval = copy_files(clone_flags, p)))
 		goto bad_fork_cleanup_semundo;
 	if ((retval = copy_fs(clone_flags, p)))
 		goto bad_fork_cleanup_files;
 	if ((retval = copy_sighand(clone_flags, p)))
 		goto bad_fork_cleanup_fs;
 	if ((retval = copy_signal(clone_flags, p)))
 		goto bad_fork_cleanup_sighand;
 	if ((retval = copy_mm(clone_flags, p)))
 		goto bad_fork_cleanup_signal;
 	if ((retval = copy_keys(clone_flags, p)))
 		goto bad_fork_cleanup_mm;
 	if ((retval = copy_namespaces(clone_flags, p)))
 		goto bad_fork_cleanup_keys;
 	if ((retval = copy_io(clone_flags, p)))
 		goto bad_fork_cleanup_namespaces;
 	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
 	if (retval)
 		goto bad_fork_cleanup_io;
 	if (pid != &init_struct_pid) {
 		retval = -ENOMEM;
 		pid = alloc_pid(task_active_pid_ns(p));
 		if (!pid)
 			goto bad_fork_cleanup_io;
 		if (clone_flags & CLONE_NEWPID) {
 			retval = pid_ns_prepare_proc(task_active_pid_ns(p));
 			if (retval < 0)
 				goto bad_fork_free_pid;
 		}
 	}
 	p->pid = pid_nr(pid);
 	p->tgid = p->pid;
 	if (clone_flags & CLONE_THREAD)
 		p->tgid = current->tgid;
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 	/*
 	 * Clear TID on mm_release()?
 	 */
 	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
 #ifdef CONFIG_FUTEX
 	p->robust_list = NULL;
 #ifdef CONFIG_COMPAT
 	p->compat_robust_list = NULL;
 #endif
 	INIT_LIST_HEAD(&p->pi_state_list);
 	p->pi_state_cache = NULL;
 #endif
 	/*
 	 * sigaltstack should be cleared when sharing the same VM
 	 */
 	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
 		p->sas_ss_sp = p->sas_ss_size = 0;
 	/*
 	 * Syscall tracing should be turned off in the child regardless
 	 * of CLONE_PTRACE.
 	 */
 	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
 #endif
 	clear_all_latency_tracing(p);
 	/* Our parent execution domain becomes current domain
 	   These must match for thread signalling to apply */
 	p->parent_exec_id = p->self_exec_id;
 	/* ok, now we should be set up.. */
 	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
 	p->pdeath_signal = 0;
 	p->exit_state = 0;
 	/*
 	 * Ok, make it visible to the rest of the system.
 	 * We dont wake it up yet.
 	 */
 	p->group_leader = p;
 	INIT_LIST_HEAD(&p->thread_group);
 	INIT_LIST_HEAD(&p->ptrace_children);
 	INIT_LIST_HEAD(&p->ptrace_list);
 	/* Now that the task is set up, run cgroup callbacks if
 	 * necessary. We need to run them before the task is visible
 	 * on the tasklist. */
 	cgroup_fork_callbacks(p);
 	cgroup_callbacks_done = 1;
 	/* Need tasklist lock for parent etc handling! */
 	write_lock_irq(&tasklist_lock);
 	/*
 	 * The task hasn't been attached yet, so its cpus_allowed mask will
 	 * not be changed, nor will its assigned CPU.
 	 *
 	 * The cpus_allowed mask of the parent may have changed after it was
 	 * copied first time - so re-copy it here, then check the child's CPU
 	 * to ensure it is on a valid CPU (and if not, just force it back to
 	 * parent's CPU). This avoids alot of nasty races.
 	 */
 	p->cpus_allowed = current->cpus_allowed;
 	p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
 			!cpu_online(task_cpu(p))))
 		set_task_cpu(p, smp_processor_id());
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
 		p->real_parent = current->real_parent;
 	else
 		p->real_parent = current;
 	p->parent = p->real_parent;
 	spin_lock(&current->sighand->siglock);
 	/*
 	 * Process group and session signals need to be delivered to just the
 	 * parent before the fork or both the parent and the child after the
 	 * fork. Restart if a signal comes in before we add the new process to
 	 * it's process group.
 	 * A fatal signal pending means that current will exit, so the new
 	 * thread can't slip out of an OOM kill (or normal SIGKILL).
  	 */
 	recalc_sigpending();
 	if (signal_pending(current)) {
 		spin_unlock(&current->sighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
 		goto bad_fork_free_pid;
 	}
 	if (clone_flags & CLONE_THREAD) {
 		p->group_leader = current->group_leader;
 		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
 		if (!cputime_eq(current->signal->it_virt_expires,
 				cputime_zero) ||
 		    !cputime_eq(current->signal->it_prof_expires,
 				cputime_zero) ||
 		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
 		    !list_empty(&current->signal->cpu_timers[0]) ||
 		    !list_empty(&current->signal->cpu_timers[1]) ||
 		    !list_empty(&current->signal->cpu_timers[2])) {
 			/*
 			 * Have child wake up on its first tick to check
 			 * for process CPU timers.
 			 */
 			p->it_prof_expires = jiffies_to_cputime(1);
 		}
 	}
 	if (likely(p->pid)) {
 		add_parent(p);
 		if (unlikely(p->ptrace & PT_PTRACED))
 			__ptrace_link(p, current->parent);
 		if (thread_group_leader(p)) {
 			if (clone_flags & CLONE_NEWPID)
 				p->nsproxy->pid_ns->child_reaper = p;
 			p->signal->leader_pid = pid;
 			p->signal->tty = current->signal->tty;
 			set_task_pgrp(p, task_pgrp_nr(current));
 			set_task_session(p, task_session_nr(current));
 			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
 			attach_pid(p, PIDTYPE_SID, task_session(current));
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
 		}
 		attach_pid(p, PIDTYPE_PID, pid);
 		nr_threads++;
 	}
 	total_forks++;
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
 	return p;
 bad_fork_free_pid:
 	if (pid != &init_struct_pid)
 		free_pid(pid);
 bad_fork_cleanup_io:
 	put_io_context(p->io_context);
 bad_fork_cleanup_namespaces:
 	exit_task_namespaces(p);
 bad_fork_cleanup_keys:
 	exit_keys(p);
 bad_fork_cleanup_mm:
 	if (p->mm)
 		mmput(p->mm);
 bad_fork_cleanup_signal:
 	cleanup_signal(p);
 bad_fork_cleanup_sighand:
 	__cleanup_sighand(p->sighand);
 bad_fork_cleanup_fs:
 	exit_fs(p); /* blocking */
 bad_fork_cleanup_files:
 	exit_files(p); /* blocking */
 bad_fork_cleanup_semundo:
 	exit_sem(p);
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_security:
 	security_task_free(p);
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
 	cgroup_exit(p, cgroup_callbacks_done);
 	delayacct_tsk_free(p);
 	if (p->binfmt)
 		module_put(p->binfmt->module);
 bad_fork_cleanup_put_domain:
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
 	put_group_info(p->group_info);
 	atomic_dec(&p->user->processes);
 	free_uid(p->user);
 bad_fork_free:
 	free_task(p);
 fork_out:
 	return ERR_PTR(retval);
 }
 noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
 	return regs;
 }
 struct task_struct * __cpuinit fork_idle(int cpu)
 {
 	struct task_struct *task;
 	struct pt_regs regs;
 	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
 				&init_struct_pid);
 	if (!IS_ERR(task))
 		init_idle(task, cpu);
 	return task;
 }
 static int fork_traceflag(unsigned clone_flags)
 {
 	if (clone_flags & CLONE_UNTRACED)
 		return 0;
 	else if (clone_flags & CLONE_VFORK) {
 		if (current->ptrace & PT_TRACE_VFORK)
 			return PTRACE_EVENT_VFORK;
 	} else if ((clone_flags & CSIGNAL) != SIGCHLD) {
 		if (current->ptrace & PT_TRACE_CLONE)
 			return PTRACE_EVENT_CLONE;
 	} else if (current->ptrace & PT_TRACE_FORK)
 		return PTRACE_EVENT_FORK;
 	return 0;
 }
 /*
  *  Ok, this is the main fork-routine.
  *
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
 long do_fork(unsigned long clone_flags,
 	      unsigned long stack_start,
 	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
 	      int __user *child_tidptr)
 {
 	struct task_struct *p;
 	int trace = 0;
 	long nr;
 	/*
 	 * We hope to recycle these flags after 2.6.26
 	 */
 	if (unlikely(clone_flags & CLONE_STOPPED)) {
 		static int __read_mostly count = 100;
 		if (count > 0 && printk_ratelimit()) {
 			char comm[TASK_COMM_LEN];
 			count--;
 			printk(KERN_INFO "fork(): process `%s' used deprecated "
 					"clone flags 0x%lx\n",
 				get_task_comm(comm, current),
 				clone_flags & CLONE_STOPPED);
 		}
 	}
 	if (unlikely(current->ptrace)) {
 		trace = fork_traceflag (clone_flags);
 		if (trace)
 			clone_flags |= CLONE_PTRACE;
 	}
 	p = copy_process(clone_flags, stack_start, regs, stack_size,
 			child_tidptr, NULL);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
 	 */
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 		nr = task_pid_vnr(p);
 		if (clone_flags & CLONE_PARENT_SETTID)
 			put_user(nr, parent_tidptr);
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
 		}
 		if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
 			/*
 			 * We'll start up with an immediate SIGSTOP.
 			 */
 			sigaddset(&p->pending.signal, SIGSTOP);
 			set_tsk_thread_flag(p, TIF_SIGPENDING);
 		}
 		if (!(clone_flags & CLONE_STOPPED))
 			wake_up_new_task(p, clone_flags);
 		else
 			__set_task_state(p, TASK_STOPPED);
 		if (unlikely (trace)) {
 			current->ptrace_message = nr;
 			ptrace_notify ((trace << 8) | SIGTRAP);
 		}
 		if (clone_flags & CLONE_VFORK) {
 			freezer_do_not_count();
 			wait_for_completion(&vfork);
 			freezer_count();
 			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
 				current->ptrace_message = nr;
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
 			}
 		}
 	} else {
 		nr = PTR_ERR(p);
 	}
 	return nr;
 }
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 static void sighand_ctor(struct kmem_cache *cachep, void *data)
 {
 	struct sighand_struct *sighand = data;
 	spin_lock_init(&sighand->siglock);
 	init_waitqueue_head(&sighand->signalfd_wqh);
 }
 void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
 			sighand_ctor);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 	files_cachep = kmem_cache_create("files_cache",
 			sizeof(struct files_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 	fs_cachep = kmem_cache_create("fs_cache",
 			sizeof(struct fs_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 	vm_area_cachep = kmem_cache_create("vm_area_struct",
 			sizeof(struct vm_area_struct), 0,
 			SLAB_PANIC, NULL);
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 }
 /*
  * Check constraints on flags passed to the unshare system call and
  * force unsharing of additional process context as appropriate.
  */
 static void check_unshare_flags(unsigned long *flags_ptr)
 {
 	/*
 	 * If unsharing a thread from a thread group, must also
 	 * unshare vm.
 	 */
 	if (*flags_ptr & CLONE_THREAD)
 		*flags_ptr |= CLONE_VM;
 	/*
 	 * If unsharing vm, must also unshare signal handlers.
 	 */
 	if (*flags_ptr & CLONE_VM)
 		*flags_ptr |= CLONE_SIGHAND;
 	/*
 	 * If unsharing signal handlers and the task was created
 	 * using CLONE_THREAD, then must unshare the thread
 	 */
 	if ((*flags_ptr & CLONE_SIGHAND) &&
 	    (atomic_read(&current->signal->count) > 1))
 		*flags_ptr |= CLONE_THREAD;
 	/*
 	 * If unsharing namespace, must also unshare filesystem information.
 	 */
 	if (*flags_ptr & CLONE_NEWNS)
 		*flags_ptr |= CLONE_FS;
 }
 /*
  * Unsharing of tasks created with CLONE_THREAD is not supported yet
  */
 static int unshare_thread(unsigned long unshare_flags)
 {
 	if (unshare_flags & CLONE_THREAD)
 		return -EINVAL;
 	return 0;
 }
 /*
  * Unshare the filesystem structure if it is being shared
  */
 static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 {
 	struct fs_struct *fs = current->fs;
 	if ((unshare_flags & CLONE_FS) &&
 	    (fs && atomic_read(&fs->count) > 1)) {
 		*new_fsp = __copy_fs_struct(current->fs);
 		if (!*new_fsp)
 			return -ENOMEM;
 	}
 	return 0;
 }
 /*
  * Unsharing of sighand is not supported yet
  */
 static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
 {
 	struct sighand_struct *sigh = current->sighand;
 	if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
 		return -EINVAL;
 	else
 		return 0;
 }
 /*
  * Unshare vm if it is being shared
  */
 static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
 {
 	struct mm_struct *mm = current->mm;
 	if ((unshare_flags & CLONE_VM) &&
 	    (mm && atomic_read(&mm->mm_users) > 1)) {
 		return -EINVAL;
 	}
 	return 0;
 }
 /*
  * Unshare file descriptor table if it is being shared
  */
 static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
 {
 	struct files_struct *fd = current->files;
 	int error = 0;
 	if ((unshare_flags & CLONE_FILES) &&
 	    (fd && atomic_read(&fd->count) > 1)) {
 		*new_fdp = dup_fd(fd, &error);
 		if (!*new_fdp)
 			return error;
 	}
 	return 0;
 }
 /*
  * unshare allows a process to 'unshare' part of the process
  * context which was originally shared using clone.  copy_*
  * functions used by do_fork() cannot be used here directly
  * because they modify an inactive task_struct that is being
  * constructed. Here we are modifying the current, active,
  * task_struct.
  */
 asmlinkage long sys_unshare(unsigned long unshare_flags)
 {
 	int err = 0;
 	struct fs_struct *fs, *new_fs = NULL;
 	struct sighand_struct *new_sigh = NULL;
 	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
 	struct nsproxy *new_nsproxy = NULL;
 	int do_sysvsem = 0;
 	check_unshare_flags(&unshare_flags);
 	/* Return -EINVAL for all unsupported flags */
 	err = -EINVAL;
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
 				CLONE_NEWNET))
 		goto bad_unshare_out;
 	/*
 	 * CLONE_NEWIPC must also detach from the undolist: after switching
 	 * to a new ipc namespace, the semaphore arrays from the old
 	 * namespace are unreachable.
 	 */
 	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
 		do_sysvsem = 1;
 	if ((err = unshare_thread(unshare_flags)))
 		goto bad_unshare_out;
 	if ((err = unshare_fs(unshare_flags, &new_fs)))
 		goto bad_unshare_cleanup_thread;
 	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
 		goto bad_unshare_cleanup_fs;
 	if ((err = unshare_vm(unshare_flags, &new_mm)))
 		goto bad_unshare_cleanup_sigh;
 	if ((err = unshare_fd(unshare_flags, &new_fd)))
 		goto bad_unshare_cleanup_vm;
 	if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
 			new_fs)))
 		goto bad_unshare_cleanup_fd;
 	if (new_fs ||  new_mm || new_fd || do_sysvsem || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
 			 * CLONE_SYSVSEM is equivalent to sys_exit().
 			 */
 			exit_sem(current);
 		}
 		if (new_nsproxy) {
 			switch_task_namespaces(current, new_nsproxy);
 			new_nsproxy = NULL;
 		}
 		task_lock(current);
 		if (new_fs) {
 			fs = current->fs;
 			current->fs = new_fs;
 			new_fs = fs;
 		}
 		if (new_mm) {
 			mm = current->mm;
 			active_mm = current->active_mm;
 			current->mm = new_mm;
 			current->active_mm = new_mm;
 			activate_mm(active_mm, new_mm);
 			new_mm = mm;
 		}
 		if (new_fd) {
 			fd = current->files;
 			current->files = new_fd;
 			new_fd = fd;
 		}
 		task_unlock(current);
 	}
 	if (new_nsproxy)
 		put_nsproxy(new_nsproxy);
 bad_unshare_cleanup_fd:
 	if (new_fd)
 		put_files_struct(new_fd);
 bad_unshare_cleanup_vm:
 	if (new_mm)
 		mmput(new_mm);
 bad_unshare_cleanup_sigh:
 	if (new_sigh)
 		if (atomic_dec_and_test(&new_sigh->count))
 			kmem_cache_free(sighand_cachep, new_sigh);
 bad_unshare_cleanup_fs:
 	if (new_fs)
 		put_fs_struct(new_fs);
 bad_unshare_cleanup_thread:
 bad_unshare_out:
 	return err;
 }
 /*
  *	Helper to unshare the files of the current task.
  *	We don't want to expose copy_files internals to
  *	the exec layer of the kernel.
  */
 int unshare_files(struct files_struct **displaced)
 {
 	struct task_struct *task = current;
 	struct files_struct *copy = NULL;
 	int error;
 	error = unshare_fd(CLONE_FILES, &copy);
 	if (error || !copy) {
 		*displaced = NULL;
 		return error;
 	}
 	*displaced = task->files;
 	task_lock(task);
 	task->files = copy;
 	task_unlock(task);
 	return 0;
 }