Commit b8f00e6be46f4c9a112e05fd692712873c4c4048

Authored by Al Viro
1 parent 9df7fa16ee

acct: new lifetime rules

Do not reuse bsd_acct_struct after closing the damn thing.
Structure lifetime is controlled by refcount now.  We also
have a mutex in there, held over closing and writing (the
file is O_APPEND, so we are not losing any concurrency).

As the result, we do not need to bother with get_file()/fput()
on log write anymore.  Moreover, do_acct_process() only needs
acct itself; file and pidns are picked from it.

Killed instances are distinguished by having NULL ->ns.
Refcount is protected by acct_lock; anybody taking the
mutex needs to grab a reference first.

The things will get a lot simpler in the next commits - this
is just the minimal chunk switching to the new lifetime rules.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 1 changed file with 114 additions and 106 deletions Side-by-side Diff

... ... @@ -75,15 +75,11 @@
75 75 /*
76 76 * External references and all of the globals.
77 77 */
78   -static void do_acct_process(struct bsd_acct_struct *acct,
79   - struct pid_namespace *ns, struct file *);
  78 +static void do_acct_process(struct bsd_acct_struct *acct);
80 79  
81   -/*
82   - * This structure is used so that all the data protected by lock
83   - * can be placed in the same cache line as the lock. This primes
84   - * the cache line to have the data after getting the lock.
85   - */
86 80 struct bsd_acct_struct {
  81 + long count;
  82 + struct mutex lock;
87 83 int active;
88 84 unsigned long needcheck;
89 85 struct file *file;
90 86  
91 87  
92 88  
93 89  
94 90  
... ... @@ -157,39 +153,59 @@
157 153 return res;
158 154 }
159 155  
160   -/*
161   - * Close the old accounting file (if currently open) and then replace
162   - * it with file (if non-NULL).
163   - *
164   - * NOTE: acct_lock MUST be held on entry and exit.
165   - */
166   -static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
167   - struct pid_namespace *ns)
  156 +static void acct_put(struct bsd_acct_struct *p)
168 157 {
169   - struct file *old_acct = NULL;
170   - struct pid_namespace *old_ns = NULL;
  158 + spin_lock(&acct_lock);
  159 + if (!--p->count)
  160 + kfree(p);
  161 + spin_unlock(&acct_lock);
  162 +}
171 163  
172   - if (acct->file) {
173   - old_acct = acct->file;
174   - old_ns = acct->ns;
175   - acct->active = 0;
176   - acct->file = NULL;
177   - acct->ns = NULL;
178   - list_del(&acct->list);
  164 +static struct bsd_acct_struct *acct_get(struct bsd_acct_struct **p)
  165 +{
  166 + struct bsd_acct_struct *res;
  167 + spin_lock(&acct_lock);
  168 +again:
  169 + res = *p;
  170 + if (res)
  171 + res->count++;
  172 + spin_unlock(&acct_lock);
  173 + if (res) {
  174 + mutex_lock(&res->lock);
  175 + if (!res->ns) {
  176 + mutex_unlock(&res->lock);
  177 + spin_lock(&acct_lock);
  178 + if (!--res->count)
  179 + kfree(res);
  180 + goto again;
  181 + }
179 182 }
180   - if (file) {
181   - acct->file = file;
182   - acct->ns = ns;
183   - acct->needcheck = jiffies;
184   - acct->active = 0;
185   - list_add(&acct->list, &acct_list);
186   - }
187   - if (old_acct) {
188   - mnt_unpin(old_acct->f_path.mnt);
  183 + return res;
  184 +}
  185 +
  186 +static void acct_kill(struct bsd_acct_struct *acct,
  187 + struct bsd_acct_struct *new)
  188 +{
  189 + if (acct) {
  190 + struct file *file = acct->file;
  191 + struct pid_namespace *ns = acct->ns;
  192 + spin_lock(&acct_lock);
  193 + list_del(&acct->list);
  194 + mnt_unpin(file->f_path.mnt);
189 195 spin_unlock(&acct_lock);
190   - do_acct_process(acct, old_ns, old_acct);
191   - filp_close(old_acct, NULL);
  196 + do_acct_process(acct);
  197 + filp_close(file, NULL);
192 198 spin_lock(&acct_lock);
  199 + ns->bacct = new;
  200 + if (new) {
  201 + mnt_pin(new->file->f_path.mnt);
  202 + list_add(&new->list, &acct_list);
  203 + }
  204 + acct->ns = NULL;
  205 + mutex_unlock(&acct->lock);
  206 + if (!(acct->count -= 2))
  207 + kfree(acct);
  208 + spin_unlock(&acct_lock);
193 209 }
194 210 }
195 211  
196 212  
197 213  
198 214  
199 215  
200 216  
201 217  
202 218  
203 219  
204 220  
205 221  
... ... @@ -197,47 +213,50 @@
197 213 {
198 214 struct file *file;
199 215 struct vfsmount *mnt;
200   - struct pid_namespace *ns;
201   - struct bsd_acct_struct *acct = NULL;
  216 + struct pid_namespace *ns = task_active_pid_ns(current);
  217 + struct bsd_acct_struct *acct, *old;
202 218  
  219 + acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
  220 + if (!acct)
  221 + return -ENOMEM;
  222 +
203 223 /* Difference from BSD - they don't do O_APPEND */
204 224 file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
205   - if (IS_ERR(file))
  225 + if (IS_ERR(file)) {
  226 + kfree(acct);
206 227 return PTR_ERR(file);
  228 + }
207 229  
208 230 if (!S_ISREG(file_inode(file)->i_mode)) {
  231 + kfree(acct);
209 232 filp_close(file, NULL);
210 233 return -EACCES;
211 234 }
212 235  
213 236 if (!file->f_op->write) {
  237 + kfree(acct);
214 238 filp_close(file, NULL);
215 239 return -EIO;
216 240 }
217 241  
218   - ns = task_active_pid_ns(current);
219   - if (ns->bacct == NULL) {
220   - acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
221   - if (acct == NULL) {
222   - filp_close(file, NULL);
223   - return -ENOMEM;
224   - }
225   - }
  242 + acct->count = 1;
  243 + acct->file = file;
  244 + acct->needcheck = jiffies;
  245 + acct->ns = ns;
  246 + mutex_init(&acct->lock);
  247 + mnt = file->f_path.mnt;
226 248  
227   - spin_lock(&acct_lock);
228   - if (ns->bacct == NULL) {
  249 + old = acct_get(&ns->bacct);
  250 + if (old) {
  251 + acct_kill(old, acct);
  252 + } else {
  253 + spin_lock(&acct_lock);
229 254 ns->bacct = acct;
230   - acct = NULL;
  255 + mnt_pin(mnt);
  256 + list_add(&acct->list, &acct_list);
  257 + spin_unlock(&acct_lock);
231 258 }
232   -
233   - mnt = file->f_path.mnt;
234   - mnt_pin(mnt);
235   - acct_file_reopen(ns->bacct, file, ns);
236   - spin_unlock(&acct_lock);
237   -
238 259 mntput(mnt); /* it's pinned, now give up active reference */
239   - kfree(acct);
240   -
241 260 return 0;
242 261 }
243 262  
... ... @@ -270,15 +289,7 @@
270 289 mutex_unlock(&acct_on_mutex);
271 290 putname(tmp);
272 291 } else {
273   - struct bsd_acct_struct *acct;
274   -
275   - acct = task_active_pid_ns(current)->bacct;
276   - if (acct == NULL)
277   - return 0;
278   -
279   - spin_lock(&acct_lock);
280   - acct_file_reopen(acct, NULL, NULL);
281   - spin_unlock(&acct_lock);
  292 + acct_kill(acct_get(&task_active_pid_ns(current)->bacct), NULL);
282 293 }
283 294  
284 295 return error;
... ... @@ -298,8 +309,19 @@
298 309 spin_lock(&acct_lock);
299 310 restart:
300 311 list_for_each_entry(acct, &acct_list, list)
301   - if (acct->file && acct->file->f_path.mnt == m) {
302   - acct_file_reopen(acct, NULL, NULL);
  312 + if (acct->file->f_path.mnt == m) {
  313 + acct->count++;
  314 + spin_unlock(&acct_lock);
  315 + mutex_lock(&acct->lock);
  316 + if (!acct->ns) {
  317 + mutex_unlock(&acct->lock);
  318 + spin_lock(&acct_lock);
  319 + if (!--acct->count)
  320 + kfree(acct);
  321 + goto restart;
  322 + }
  323 + acct_kill(acct, NULL);
  324 + spin_lock(&acct_lock);
303 325 goto restart;
304 326 }
305 327 spin_unlock(&acct_lock);
... ... @@ -319,8 +341,19 @@
319 341 spin_lock(&acct_lock);
320 342 restart:
321 343 list_for_each_entry(acct, &acct_list, list)
322   - if (acct->file && acct->file->f_path.dentry->d_sb == sb) {
323   - acct_file_reopen(acct, NULL, NULL);
  344 + if (acct->file->f_path.dentry->d_sb == sb) {
  345 + acct->count++;
  346 + spin_unlock(&acct_lock);
  347 + mutex_lock(&acct->lock);
  348 + if (!acct->ns) {
  349 + mutex_unlock(&acct->lock);
  350 + spin_lock(&acct_lock);
  351 + if (!--acct->count)
  352 + kfree(acct);
  353 + goto restart;
  354 + }
  355 + acct_kill(acct, NULL);
  356 + spin_lock(&acct_lock);
324 357 goto restart;
325 358 }
326 359 spin_unlock(&acct_lock);
... ... @@ -328,17 +361,7 @@
328 361  
329 362 void acct_exit_ns(struct pid_namespace *ns)
330 363 {
331   - struct bsd_acct_struct *acct = ns->bacct;
332   -
333   - if (acct == NULL)
334   - return;
335   -
336   - spin_lock(&acct_lock);
337   - if (acct->file != NULL)
338   - acct_file_reopen(acct, NULL, NULL);
339   - spin_unlock(&acct_lock);
340   -
341   - kfree(acct);
  364 + acct_kill(acct_get(&ns->bacct), NULL);
342 365 }
343 366  
344 367 /*
345 368  
... ... @@ -507,12 +530,13 @@
507 530 /*
508 531 * do_acct_process does all actual work. Caller holds the reference to file.
509 532 */
510   -static void do_acct_process(struct bsd_acct_struct *acct,
511   - struct pid_namespace *ns, struct file *file)
  533 +static void do_acct_process(struct bsd_acct_struct *acct)
512 534 {
513 535 acct_t ac;
514 536 unsigned long flim;
515 537 const struct cred *orig_cred;
  538 + struct pid_namespace *ns = acct->ns;
  539 + struct file *file = acct->file;
516 540  
517 541 /*
518 542 * Accounting records are not subject to resource limits.
519 543  
... ... @@ -606,27 +630,12 @@
606 630 static void slow_acct_process(struct pid_namespace *ns)
607 631 {
608 632 for ( ; ns; ns = ns->parent) {
609   - struct file *file = NULL;
610   - struct bsd_acct_struct *acct;
611   -
612   - acct = ns->bacct;
613   - /*
614   - * accelerate the common fastpath:
615   - */
616   - if (!acct || !acct->file)
617   - continue;
618   -
619   - spin_lock(&acct_lock);
620   - file = acct->file;
621   - if (unlikely(!file)) {
622   - spin_unlock(&acct_lock);
623   - continue;
  633 + struct bsd_acct_struct *acct = acct_get(&ns->bacct);
  634 + if (acct) {
  635 + do_acct_process(acct);
  636 + mutex_unlock(&acct->lock);
  637 + acct_put(acct);
624 638 }
625   - get_file(file);
626   - spin_unlock(&acct_lock);
627   -
628   - do_acct_process(acct, ns, file);
629   - fput(file);
630 639 }
631 640 }
632 641  
... ... @@ -645,8 +654,7 @@
645 654 * its parent.
646 655 */
647 656 for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) {
648   - struct bsd_acct_struct *acct = ns->bacct;
649   - if (acct && acct->file)
  657 + if (ns->bacct)
650 658 break;
651 659 }
652 660 if (unlikely(ns))