Commit f7e6ced4061da509f737541ca4dbd44d83a6e82f

Authored by Al Viro
1 parent 734550921e

[PATCH] allow delayed freeing of ctl_table_header

Refcount the sucker; instead of freeing it by the end of unregistration
just drop the refcount and free only when it hits zero.  Make sure that
we _always_ make ->unregistering non-NULL in start_unregistering().

That allows anybody to get a reference to such puppy, preventing its
freeing and reuse.  It does *not* block unregistration.  Anybody who
holds such a reference can
	* try to grab a "use" reference (ctl_head_grab()); that will
succeeds if and only if it hadn't entered unregistration yet.  If it
succeeds, we can use it in all normal ways until we release the "use"
reference (with ctl_head_finish()).  Note that this relies on having
->unregistering become non-NULL in all cases when one starts to unregister
the sucker.
	* keep pointers to ctl_table entries; they *can* be freed if
the entire thing is unregistered.  However, if ctl_head_grab() succeeds,
we know that unregistration had not happened (and will not happen until
ctl_head_finish()) and such pointers can be used safely.

IOW, now we can have inodes under /proc/sys keep references to ctl_table
entries, protecting them with references to ctl_table_header and
grabbing the latter for the duration of operations that require access
to ctl_table.  That won't cause deadlocks, since unregistration will not
be stopped by mere keeping a reference to ctl_table_header.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 2 changed files with 42 additions and 1 deletions Side-by-side Diff

include/linux/sysctl.h
... ... @@ -957,6 +957,11 @@
957 957 struct ctl_table_set *parent,
958 958 int (*is_seen)(struct ctl_table_set *));
959 959  
  960 +struct ctl_table_header;
  961 +
  962 +extern void sysctl_head_get(struct ctl_table_header *);
  963 +extern void sysctl_head_put(struct ctl_table_header *);
  964 +extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *);
960 965 extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev);
961 966 extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
962 967 struct ctl_table_header *prev);
... ... @@ -1073,6 +1078,7 @@
1073 1078 struct ctl_table *ctl_table;
1074 1079 struct list_head ctl_entry;
1075 1080 int used;
  1081 + int count;
1076 1082 struct completion *unregistering;
1077 1083 struct ctl_table *ctl_table_arg;
1078 1084 struct ctl_table_root *root;
... ... @@ -1387,6 +1387,9 @@
1387 1387 spin_unlock(&sysctl_lock);
1388 1388 wait_for_completion(&wait);
1389 1389 spin_lock(&sysctl_lock);
  1390 + } else {
  1391 + /* anything non-NULL; we'll never dereference it */
  1392 + p->unregistering = ERR_PTR(-EINVAL);
1390 1393 }
1391 1394 /*
1392 1395 * do not remove from the list until nobody holds it; walking the
... ... @@ -1395,6 +1398,32 @@
1395 1398 list_del_init(&p->ctl_entry);
1396 1399 }
1397 1400  
  1401 +void sysctl_head_get(struct ctl_table_header *head)
  1402 +{
  1403 + spin_lock(&sysctl_lock);
  1404 + head->count++;
  1405 + spin_unlock(&sysctl_lock);
  1406 +}
  1407 +
  1408 +void sysctl_head_put(struct ctl_table_header *head)
  1409 +{
  1410 + spin_lock(&sysctl_lock);
  1411 + if (!--head->count)
  1412 + kfree(head);
  1413 + spin_unlock(&sysctl_lock);
  1414 +}
  1415 +
  1416 +struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
  1417 +{
  1418 + if (!head)
  1419 + BUG();
  1420 + spin_lock(&sysctl_lock);
  1421 + if (!use_table(head))
  1422 + head = ERR_PTR(-ENOENT);
  1423 + spin_unlock(&sysctl_lock);
  1424 + return head;
  1425 +}
  1426 +
1398 1427 void sysctl_head_finish(struct ctl_table_header *head)
1399 1428 {
1400 1429 if (!head)
... ... @@ -1771,6 +1800,7 @@
1771 1800 header->unregistering = NULL;
1772 1801 header->root = root;
1773 1802 sysctl_set_parent(NULL, header->ctl_table);
  1803 + header->count = 1;
1774 1804 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1775 1805 if (sysctl_check_table(namespaces, header->ctl_table)) {
1776 1806 kfree(header);
1777 1807  
... ... @@ -1834,8 +1864,9 @@
1834 1864  
1835 1865 spin_lock(&sysctl_lock);
1836 1866 start_unregistering(header);
  1867 + if (!--header->count)
  1868 + kfree(header);
1837 1869 spin_unlock(&sysctl_lock);
1838   - kfree(header);
1839 1870 }
1840 1871  
1841 1872 void setup_sysctl_set(struct ctl_table_set *p,
... ... @@ -1866,6 +1897,10 @@
1866 1897 void setup_sysctl_set(struct ctl_table_set *p,
1867 1898 struct ctl_table_set *parent,
1868 1899 int (*is_seen)(struct ctl_table_set *))
  1900 +{
  1901 +}
  1902 +
  1903 +void sysctl_head_put(struct ctl_table_header *head)
1869 1904 {
1870 1905 }
1871 1906