Commit 9e7814404b77c3e8920bee4277162bf3a7460505

Authored by KAMEZAWA Hiroyuki
Committed by Linus Torvalds
1 parent 3b641bf453

hold task->mempolicy while numa_maps scans.

/proc/<pid>/numa_maps scans vma and show mempolicy under
  mmap_sem. It sometimes accesses task->mempolicy which can
  be freed without mmap_sem and numa_maps can show some
  garbage while scanning.

This patch tries to take reference count of task->mempolicy at reading
numa_maps before calling get_vma_policy(). By this, task->mempolicy
will not be freed until numa_maps reaches its end.

V2->v3
  -  updated comments to be more verbose.
  -  removed task_lock() in numa_maps code.
V1->V2
  -  access task->mempolicy only once and remember it.  Becase kernel/exit.c
     can overwrite it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 51 additions and 3 deletions Side-by-side Diff

... ... @@ -12,6 +12,7 @@
12 12 #include <linux/sched.h>
13 13 #include <linux/proc_fs.h>
14 14 struct ctl_table_header;
  15 +struct mempolicy;
15 16  
16 17 extern struct proc_dir_entry proc_root;
17 18 #ifdef CONFIG_PROC_SYSCTL
... ... @@ -73,6 +74,9 @@
73 74 struct task_struct *task;
74 75 #ifdef CONFIG_MMU
75 76 struct vm_area_struct *tail_vma;
  77 +#endif
  78 +#ifdef CONFIG_NUMA
  79 + struct mempolicy *task_mempolicy;
76 80 #endif
77 81 };
78 82  
... ... @@ -90,10 +90,55 @@
90 90 seq_printf(m, "%*c", len, ' ');
91 91 }
92 92  
  93 +#ifdef CONFIG_NUMA
  94 +/*
  95 + * These functions are for numa_maps but called in generic **maps seq_file
  96 + * ->start(), ->stop() ops.
  97 + *
  98 + * numa_maps scans all vmas under mmap_sem and checks their mempolicy.
  99 + * Each mempolicy object is controlled by reference counting. The problem here
  100 + * is how to avoid accessing dead mempolicy object.
  101 + *
  102 + * Because we're holding mmap_sem while reading seq_file, it's safe to access
  103 + * each vma's mempolicy, no vma objects will never drop refs to mempolicy.
  104 + *
  105 + * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy
  106 + * is set and replaced under mmap_sem but unrefed and cleared under task_lock().
  107 + * So, without task_lock(), we cannot trust get_vma_policy() because we cannot
  108 + * gurantee the task never exits under us. But taking task_lock() around
  109 + * get_vma_plicy() causes lock order problem.
  110 + *
  111 + * To access task->mempolicy without lock, we hold a reference count of an
  112 + * object pointed by task->mempolicy and remember it. This will guarantee
  113 + * that task->mempolicy points to an alive object or NULL in numa_maps accesses.
  114 + */
  115 +static void hold_task_mempolicy(struct proc_maps_private *priv)
  116 +{
  117 + struct task_struct *task = priv->task;
  118 +
  119 + task_lock(task);
  120 + priv->task_mempolicy = task->mempolicy;
  121 + mpol_get(priv->task_mempolicy);
  122 + task_unlock(task);
  123 +}
  124 +static void release_task_mempolicy(struct proc_maps_private *priv)
  125 +{
  126 + mpol_put(priv->task_mempolicy);
  127 +}
  128 +#else
  129 +static void hold_task_mempolicy(struct proc_maps_private *priv)
  130 +{
  131 +}
  132 +static void release_task_mempolicy(struct proc_maps_private *priv)
  133 +{
  134 +}
  135 +#endif
  136 +
93 137 static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
94 138 {
95 139 if (vma && vma != priv->tail_vma) {
96 140 struct mm_struct *mm = vma->vm_mm;
  141 + release_task_mempolicy(priv);
97 142 up_read(&mm->mmap_sem);
98 143 mmput(mm);
99 144 }
... ... @@ -132,7 +177,7 @@
132 177  
133 178 tail_vma = get_gate_vma(priv->task->mm);
134 179 priv->tail_vma = tail_vma;
135   -
  180 + hold_task_mempolicy(priv);
136 181 /* Start with last addr hint */
137 182 vma = find_vma(mm, last_addr);
138 183 if (last_addr && vma) {
... ... @@ -159,6 +204,7 @@
159 204 if (vma)
160 205 return vma;
161 206  
  207 + release_task_mempolicy(priv);
162 208 /* End of vmas has been reached */
163 209 m->version = (tail_vma != NULL)? 0: -1UL;
164 210 up_read(&mm->mmap_sem);
165 211  
... ... @@ -1178,11 +1224,9 @@
1178 1224 walk.private = md;
1179 1225 walk.mm = mm;
1180 1226  
1181   - task_lock(task);
1182 1227 pol = get_vma_policy(task, vma, vma->vm_start);
1183 1228 mpol_to_str(buffer, sizeof(buffer), pol, 0);
1184 1229 mpol_cond_put(pol);
1185   - task_unlock(task);
1186 1230  
1187 1231 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1188 1232