Commit 272eb01485dda98e3b8910c7c1a53d597616b0a0

Authored by Eric Paris
Committed by Al Viro
1 parent c2acf7b908

filesystem notification: create fs/notify to contain all fs notification

Creating a generic filesystem notification interface, fsnotify, which will be
used by inotify, dnotify, and eventually fanotify is really starting to
clutter the fs directory.  This patch simply moves inotify and dnotify into
fs/notify/inotify and fs/notify/dnotify respectively to make both current fs/
and future notification tidier.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 14 changed files with 1928 additions and 1924 deletions Side-by-side Diff

... ... @@ -270,44 +270,7 @@
270 270  
271 271 endif # BLOCK
272 272  
273   -config DNOTIFY
274   - bool "Dnotify support"
275   - default y
276   - help
277   - Dnotify is a directory-based per-fd file change notification system
278   - that uses signals to communicate events to user-space. There exist
279   - superior alternatives, but some applications may still rely on
280   - dnotify.
281   -
282   - If unsure, say Y.
283   -
284   -config INOTIFY
285   - bool "Inotify file change notification support"
286   - default y
287   - ---help---
288   - Say Y here to enable inotify support. Inotify is a file change
289   - notification system and a replacement for dnotify. Inotify fixes
290   - numerous shortcomings in dnotify and introduces several new features
291   - including multiple file events, one-shot support, and unmount
292   - notification.
293   -
294   - For more information, see <file:Documentation/filesystems/inotify.txt>
295   -
296   - If unsure, say Y.
297   -
298   -config INOTIFY_USER
299   - bool "Inotify support for userspace"
300   - depends on INOTIFY
301   - default y
302   - ---help---
303   - Say Y here to enable inotify support for userspace, including the
304   - associated system calls. Inotify allows monitoring of both files and
305   - directories via a single open fd. Events are read from the file
306   - descriptor, which is also select()- and poll()-able.
307   -
308   - For more information, see <file:Documentation/filesystems/inotify.txt>
309   -
310   - If unsure, say Y.
  273 +source "fs/notify/Kconfig"
311 274  
312 275 config QUOTA
313 276 bool "Quota support"
... ... @@ -20,8 +20,7 @@
20 20 endif
21 21  
22 22 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
23   -obj-$(CONFIG_INOTIFY) += inotify.o
24   -obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
  23 +obj-y += notify/
25 24 obj-$(CONFIG_EPOLL) += eventpoll.o
26 25 obj-$(CONFIG_ANON_INODES) += anon_inodes.o
27 26 obj-$(CONFIG_SIGNALFD) += signalfd.o
... ... @@ -56,8 +55,6 @@
56 55 obj-$(CONFIG_QFMT_V1) += quota_v1.o
57 56 obj-$(CONFIG_QFMT_V2) += quota_v2.o
58 57 obj-$(CONFIG_QUOTACTL) += quota.o
59   -
60   -obj-$(CONFIG_DNOTIFY) += dnotify.o
61 58  
62 59 obj-$(CONFIG_PROC_FS) += proc/
63 60 obj-y += partitions/
fs/dnotify.c
1   -/*
2   - * Directory notifications for Linux.
3   - *
4   - * Copyright (C) 2000,2001,2002 Stephen Rothwell
5   - *
6   - * This program is free software; you can redistribute it and/or modify it
7   - * under the terms of the GNU General Public License as published by the
8   - * Free Software Foundation; either version 2, or (at your option) any
9   - * later version.
10   - *
11   - * This program is distributed in the hope that it will be useful, but
12   - * WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14   - * General Public License for more details.
15   - */
16   -#include <linux/fs.h>
17   -#include <linux/module.h>
18   -#include <linux/sched.h>
19   -#include <linux/dnotify.h>
20   -#include <linux/init.h>
21   -#include <linux/spinlock.h>
22   -#include <linux/slab.h>
23   -#include <linux/fdtable.h>
24   -
25   -int dir_notify_enable __read_mostly = 1;
26   -
27   -static struct kmem_cache *dn_cache __read_mostly;
28   -
29   -static void redo_inode_mask(struct inode *inode)
30   -{
31   - unsigned long new_mask;
32   - struct dnotify_struct *dn;
33   -
34   - new_mask = 0;
35   - for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
36   - new_mask |= dn->dn_mask & ~DN_MULTISHOT;
37   - inode->i_dnotify_mask = new_mask;
38   -}
39   -
40   -void dnotify_flush(struct file *filp, fl_owner_t id)
41   -{
42   - struct dnotify_struct *dn;
43   - struct dnotify_struct **prev;
44   - struct inode *inode;
45   -
46   - inode = filp->f_path.dentry->d_inode;
47   - if (!S_ISDIR(inode->i_mode))
48   - return;
49   - spin_lock(&inode->i_lock);
50   - prev = &inode->i_dnotify;
51   - while ((dn = *prev) != NULL) {
52   - if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
53   - *prev = dn->dn_next;
54   - redo_inode_mask(inode);
55   - kmem_cache_free(dn_cache, dn);
56   - break;
57   - }
58   - prev = &dn->dn_next;
59   - }
60   - spin_unlock(&inode->i_lock);
61   -}
62   -
63   -int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
64   -{
65   - struct dnotify_struct *dn;
66   - struct dnotify_struct *odn;
67   - struct dnotify_struct **prev;
68   - struct inode *inode;
69   - fl_owner_t id = current->files;
70   - struct file *f;
71   - int error = 0;
72   -
73   - if ((arg & ~DN_MULTISHOT) == 0) {
74   - dnotify_flush(filp, id);
75   - return 0;
76   - }
77   - if (!dir_notify_enable)
78   - return -EINVAL;
79   - inode = filp->f_path.dentry->d_inode;
80   - if (!S_ISDIR(inode->i_mode))
81   - return -ENOTDIR;
82   - dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
83   - if (dn == NULL)
84   - return -ENOMEM;
85   - spin_lock(&inode->i_lock);
86   - prev = &inode->i_dnotify;
87   - while ((odn = *prev) != NULL) {
88   - if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
89   - odn->dn_fd = fd;
90   - odn->dn_mask |= arg;
91   - inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
92   - goto out_free;
93   - }
94   - prev = &odn->dn_next;
95   - }
96   -
97   - rcu_read_lock();
98   - f = fcheck(fd);
99   - rcu_read_unlock();
100   - /* we'd lost the race with close(), sod off silently */
101   - /* note that inode->i_lock prevents reordering problems
102   - * between accesses to descriptor table and ->i_dnotify */
103   - if (f != filp)
104   - goto out_free;
105   -
106   - error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
107   - if (error)
108   - goto out_free;
109   -
110   - dn->dn_mask = arg;
111   - dn->dn_fd = fd;
112   - dn->dn_filp = filp;
113   - dn->dn_owner = id;
114   - inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
115   - dn->dn_next = inode->i_dnotify;
116   - inode->i_dnotify = dn;
117   - spin_unlock(&inode->i_lock);
118   - return 0;
119   -
120   -out_free:
121   - spin_unlock(&inode->i_lock);
122   - kmem_cache_free(dn_cache, dn);
123   - return error;
124   -}
125   -
126   -void __inode_dir_notify(struct inode *inode, unsigned long event)
127   -{
128   - struct dnotify_struct * dn;
129   - struct dnotify_struct **prev;
130   - struct fown_struct * fown;
131   - int changed = 0;
132   -
133   - spin_lock(&inode->i_lock);
134   - prev = &inode->i_dnotify;
135   - while ((dn = *prev) != NULL) {
136   - if ((dn->dn_mask & event) == 0) {
137   - prev = &dn->dn_next;
138   - continue;
139   - }
140   - fown = &dn->dn_filp->f_owner;
141   - send_sigio(fown, dn->dn_fd, POLL_MSG);
142   - if (dn->dn_mask & DN_MULTISHOT)
143   - prev = &dn->dn_next;
144   - else {
145   - *prev = dn->dn_next;
146   - changed = 1;
147   - kmem_cache_free(dn_cache, dn);
148   - }
149   - }
150   - if (changed)
151   - redo_inode_mask(inode);
152   - spin_unlock(&inode->i_lock);
153   -}
154   -
155   -EXPORT_SYMBOL(__inode_dir_notify);
156   -
157   -/*
158   - * This is hopelessly wrong, but unfixable without API changes. At
159   - * least it doesn't oops the kernel...
160   - *
161   - * To safely access ->d_parent we need to keep d_move away from it. Use the
162   - * dentry's d_lock for this.
163   - */
164   -void dnotify_parent(struct dentry *dentry, unsigned long event)
165   -{
166   - struct dentry *parent;
167   -
168   - if (!dir_notify_enable)
169   - return;
170   -
171   - spin_lock(&dentry->d_lock);
172   - parent = dentry->d_parent;
173   - if (parent->d_inode->i_dnotify_mask & event) {
174   - dget(parent);
175   - spin_unlock(&dentry->d_lock);
176   - __inode_dir_notify(parent->d_inode, event);
177   - dput(parent);
178   - } else {
179   - spin_unlock(&dentry->d_lock);
180   - }
181   -}
182   -EXPORT_SYMBOL_GPL(dnotify_parent);
183   -
184   -static int __init dnotify_init(void)
185   -{
186   - dn_cache = kmem_cache_create("dnotify_cache",
187   - sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
188   - return 0;
189   -}
190   -
191   -module_init(dnotify_init)
fs/inotify.c
1   -/*
2   - * fs/inotify.c - inode-based file event notifications
3   - *
4   - * Authors:
5   - * John McCutchan <ttb@tentacle.dhs.org>
6   - * Robert Love <rml@novell.com>
7   - *
8   - * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
9   - *
10   - * Copyright (C) 2005 John McCutchan
11   - * Copyright 2006 Hewlett-Packard Development Company, L.P.
12   - *
13   - * This program is free software; you can redistribute it and/or modify it
14   - * under the terms of the GNU General Public License as published by the
15   - * Free Software Foundation; either version 2, or (at your option) any
16   - * later version.
17   - *
18   - * This program is distributed in the hope that it will be useful, but
19   - * WITHOUT ANY WARRANTY; without even the implied warranty of
20   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21   - * General Public License for more details.
22   - */
23   -
24   -#include <linux/module.h>
25   -#include <linux/kernel.h>
26   -#include <linux/spinlock.h>
27   -#include <linux/idr.h>
28   -#include <linux/slab.h>
29   -#include <linux/fs.h>
30   -#include <linux/sched.h>
31   -#include <linux/init.h>
32   -#include <linux/list.h>
33   -#include <linux/writeback.h>
34   -#include <linux/inotify.h>
35   -
36   -static atomic_t inotify_cookie;
37   -
38   -/*
39   - * Lock ordering:
40   - *
41   - * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
42   - * iprune_mutex (synchronize shrink_icache_memory())
43   - * inode_lock (protects the super_block->s_inodes list)
44   - * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
45   - * inotify_handle->mutex (protects inotify_handle and watches->h_list)
46   - *
47   - * The inode->inotify_mutex and inotify_handle->mutex and held during execution
48   - * of a caller's event handler. Thus, the caller must not hold any locks
49   - * taken in their event handler while calling any of the published inotify
50   - * interfaces.
51   - */
52   -
53   -/*
54   - * Lifetimes of the three main data structures--inotify_handle, inode, and
55   - * inotify_watch--are managed by reference count.
56   - *
57   - * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
58   - * Additional references can bump the count via get_inotify_handle() and drop
59   - * the count via put_inotify_handle().
60   - *
61   - * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
62   - * to remove_watch_no_event(). Additional references can bump the count via
63   - * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
64   - * is reponsible for the final put after receiving IN_IGNORED, or when using
65   - * IN_ONESHOT after receiving the first event. Inotify does the final put if
66   - * inotify_destroy() is called.
67   - *
68   - * inode: Pinned so long as the inode is associated with a watch, from
69   - * inotify_add_watch() to the final put_inotify_watch().
70   - */
71   -
72   -/*
73   - * struct inotify_handle - represents an inotify instance
74   - *
75   - * This structure is protected by the mutex 'mutex'.
76   - */
77   -struct inotify_handle {
78   - struct idr idr; /* idr mapping wd -> watch */
79   - struct mutex mutex; /* protects this bad boy */
80   - struct list_head watches; /* list of watches */
81   - atomic_t count; /* reference count */
82   - u32 last_wd; /* the last wd allocated */
83   - const struct inotify_operations *in_ops; /* inotify caller operations */
84   -};
85   -
86   -static inline void get_inotify_handle(struct inotify_handle *ih)
87   -{
88   - atomic_inc(&ih->count);
89   -}
90   -
91   -static inline void put_inotify_handle(struct inotify_handle *ih)
92   -{
93   - if (atomic_dec_and_test(&ih->count)) {
94   - idr_destroy(&ih->idr);
95   - kfree(ih);
96   - }
97   -}
98   -
99   -/**
100   - * get_inotify_watch - grab a reference to an inotify_watch
101   - * @watch: watch to grab
102   - */
103   -void get_inotify_watch(struct inotify_watch *watch)
104   -{
105   - atomic_inc(&watch->count);
106   -}
107   -EXPORT_SYMBOL_GPL(get_inotify_watch);
108   -
109   -int pin_inotify_watch(struct inotify_watch *watch)
110   -{
111   - struct super_block *sb = watch->inode->i_sb;
112   - spin_lock(&sb_lock);
113   - if (sb->s_count >= S_BIAS) {
114   - atomic_inc(&sb->s_active);
115   - spin_unlock(&sb_lock);
116   - atomic_inc(&watch->count);
117   - return 1;
118   - }
119   - spin_unlock(&sb_lock);
120   - return 0;
121   -}
122   -
123   -/**
124   - * put_inotify_watch - decrements the ref count on a given watch. cleans up
125   - * watch references if the count reaches zero. inotify_watch is freed by
126   - * inotify callers via the destroy_watch() op.
127   - * @watch: watch to release
128   - */
129   -void put_inotify_watch(struct inotify_watch *watch)
130   -{
131   - if (atomic_dec_and_test(&watch->count)) {
132   - struct inotify_handle *ih = watch->ih;
133   -
134   - iput(watch->inode);
135   - ih->in_ops->destroy_watch(watch);
136   - put_inotify_handle(ih);
137   - }
138   -}
139   -EXPORT_SYMBOL_GPL(put_inotify_watch);
140   -
141   -void unpin_inotify_watch(struct inotify_watch *watch)
142   -{
143   - struct super_block *sb = watch->inode->i_sb;
144   - put_inotify_watch(watch);
145   - deactivate_super(sb);
146   -}
147   -
148   -/*
149   - * inotify_handle_get_wd - returns the next WD for use by the given handle
150   - *
151   - * Callers must hold ih->mutex. This function can sleep.
152   - */
153   -static int inotify_handle_get_wd(struct inotify_handle *ih,
154   - struct inotify_watch *watch)
155   -{
156   - int ret;
157   -
158   - do {
159   - if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
160   - return -ENOSPC;
161   - ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
162   - } while (ret == -EAGAIN);
163   -
164   - if (likely(!ret))
165   - ih->last_wd = watch->wd;
166   -
167   - return ret;
168   -}
169   -
170   -/*
171   - * inotify_inode_watched - returns nonzero if there are watches on this inode
172   - * and zero otherwise. We call this lockless, we do not care if we race.
173   - */
174   -static inline int inotify_inode_watched(struct inode *inode)
175   -{
176   - return !list_empty(&inode->inotify_watches);
177   -}
178   -
179   -/*
180   - * Get child dentry flag into synch with parent inode.
181   - * Flag should always be clear for negative dentrys.
182   - */
183   -static void set_dentry_child_flags(struct inode *inode, int watched)
184   -{
185   - struct dentry *alias;
186   -
187   - spin_lock(&dcache_lock);
188   - list_for_each_entry(alias, &inode->i_dentry, d_alias) {
189   - struct dentry *child;
190   -
191   - list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
192   - if (!child->d_inode)
193   - continue;
194   -
195   - spin_lock(&child->d_lock);
196   - if (watched)
197   - child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
198   - else
199   - child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
200   - spin_unlock(&child->d_lock);
201   - }
202   - }
203   - spin_unlock(&dcache_lock);
204   -}
205   -
206   -/*
207   - * inotify_find_handle - find the watch associated with the given inode and
208   - * handle
209   - *
210   - * Callers must hold inode->inotify_mutex.
211   - */
212   -static struct inotify_watch *inode_find_handle(struct inode *inode,
213   - struct inotify_handle *ih)
214   -{
215   - struct inotify_watch *watch;
216   -
217   - list_for_each_entry(watch, &inode->inotify_watches, i_list) {
218   - if (watch->ih == ih)
219   - return watch;
220   - }
221   -
222   - return NULL;
223   -}
224   -
225   -/*
226   - * remove_watch_no_event - remove watch without the IN_IGNORED event.
227   - *
228   - * Callers must hold both inode->inotify_mutex and ih->mutex.
229   - */
230   -static void remove_watch_no_event(struct inotify_watch *watch,
231   - struct inotify_handle *ih)
232   -{
233   - list_del(&watch->i_list);
234   - list_del(&watch->h_list);
235   -
236   - if (!inotify_inode_watched(watch->inode))
237   - set_dentry_child_flags(watch->inode, 0);
238   -
239   - idr_remove(&ih->idr, watch->wd);
240   -}
241   -
242   -/**
243   - * inotify_remove_watch_locked - Remove a watch from both the handle and the
244   - * inode. Sends the IN_IGNORED event signifying that the inode is no longer
245   - * watched. May be invoked from a caller's event handler.
246   - * @ih: inotify handle associated with watch
247   - * @watch: watch to remove
248   - *
249   - * Callers must hold both inode->inotify_mutex and ih->mutex.
250   - */
251   -void inotify_remove_watch_locked(struct inotify_handle *ih,
252   - struct inotify_watch *watch)
253   -{
254   - remove_watch_no_event(watch, ih);
255   - ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
256   -}
257   -EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
258   -
259   -/* Kernel API for producing events */
260   -
261   -/*
262   - * inotify_d_instantiate - instantiate dcache entry for inode
263   - */
264   -void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
265   -{
266   - struct dentry *parent;
267   -
268   - if (!inode)
269   - return;
270   -
271   - spin_lock(&entry->d_lock);
272   - parent = entry->d_parent;
273   - if (parent->d_inode && inotify_inode_watched(parent->d_inode))
274   - entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
275   - spin_unlock(&entry->d_lock);
276   -}
277   -
278   -/*
279   - * inotify_d_move - dcache entry has been moved
280   - */
281   -void inotify_d_move(struct dentry *entry)
282   -{
283   - struct dentry *parent;
284   -
285   - parent = entry->d_parent;
286   - if (inotify_inode_watched(parent->d_inode))
287   - entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
288   - else
289   - entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
290   -}
291   -
292   -/**
293   - * inotify_inode_queue_event - queue an event to all watches on this inode
294   - * @inode: inode event is originating from
295   - * @mask: event mask describing this event
296   - * @cookie: cookie for synchronization, or zero
297   - * @name: filename, if any
298   - * @n_inode: inode associated with name
299   - */
300   -void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
301   - const char *name, struct inode *n_inode)
302   -{
303   - struct inotify_watch *watch, *next;
304   -
305   - if (!inotify_inode_watched(inode))
306   - return;
307   -
308   - mutex_lock(&inode->inotify_mutex);
309   - list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
310   - u32 watch_mask = watch->mask;
311   - if (watch_mask & mask) {
312   - struct inotify_handle *ih= watch->ih;
313   - mutex_lock(&ih->mutex);
314   - if (watch_mask & IN_ONESHOT)
315   - remove_watch_no_event(watch, ih);
316   - ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
317   - name, n_inode);
318   - mutex_unlock(&ih->mutex);
319   - }
320   - }
321   - mutex_unlock(&inode->inotify_mutex);
322   -}
323   -EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
324   -
325   -/**
326   - * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
327   - * @dentry: the dentry in question, we queue against this dentry's parent
328   - * @mask: event mask describing this event
329   - * @cookie: cookie for synchronization, or zero
330   - * @name: filename, if any
331   - */
332   -void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
333   - u32 cookie, const char *name)
334   -{
335   - struct dentry *parent;
336   - struct inode *inode;
337   -
338   - if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
339   - return;
340   -
341   - spin_lock(&dentry->d_lock);
342   - parent = dentry->d_parent;
343   - inode = parent->d_inode;
344   -
345   - if (inotify_inode_watched(inode)) {
346   - dget(parent);
347   - spin_unlock(&dentry->d_lock);
348   - inotify_inode_queue_event(inode, mask, cookie, name,
349   - dentry->d_inode);
350   - dput(parent);
351   - } else
352   - spin_unlock(&dentry->d_lock);
353   -}
354   -EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
355   -
356   -/**
357   - * inotify_get_cookie - return a unique cookie for use in synchronizing events.
358   - */
359   -u32 inotify_get_cookie(void)
360   -{
361   - return atomic_inc_return(&inotify_cookie);
362   -}
363   -EXPORT_SYMBOL_GPL(inotify_get_cookie);
364   -
365   -/**
366   - * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
367   - * @list: list of inodes being unmounted (sb->s_inodes)
368   - *
369   - * Called with inode_lock held, protecting the unmounting super block's list
370   - * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
371   - * We temporarily drop inode_lock, however, and CAN block.
372   - */
373   -void inotify_unmount_inodes(struct list_head *list)
374   -{
375   - struct inode *inode, *next_i, *need_iput = NULL;
376   -
377   - list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
378   - struct inotify_watch *watch, *next_w;
379   - struct inode *need_iput_tmp;
380   - struct list_head *watches;
381   -
382   - /*
383   - * If i_count is zero, the inode cannot have any watches and
384   - * doing an __iget/iput with MS_ACTIVE clear would actually
385   - * evict all inodes with zero i_count from icache which is
386   - * unnecessarily violent and may in fact be illegal to do.
387   - */
388   - if (!atomic_read(&inode->i_count))
389   - continue;
390   -
391   - /*
392   - * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
393   - * I_WILL_FREE which is fine because by that point the inode
394   - * cannot have any associated watches.
395   - */
396   - if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
397   - continue;
398   -
399   - need_iput_tmp = need_iput;
400   - need_iput = NULL;
401   - /* In case inotify_remove_watch_locked() drops a reference. */
402   - if (inode != need_iput_tmp)
403   - __iget(inode);
404   - else
405   - need_iput_tmp = NULL;
406   - /* In case the dropping of a reference would nuke next_i. */
407   - if ((&next_i->i_sb_list != list) &&
408   - atomic_read(&next_i->i_count) &&
409   - !(next_i->i_state & (I_CLEAR | I_FREEING |
410   - I_WILL_FREE))) {
411   - __iget(next_i);
412   - need_iput = next_i;
413   - }
414   -
415   - /*
416   - * We can safely drop inode_lock here because we hold
417   - * references on both inode and next_i. Also no new inodes
418   - * will be added since the umount has begun. Finally,
419   - * iprune_mutex keeps shrink_icache_memory() away.
420   - */
421   - spin_unlock(&inode_lock);
422   -
423   - if (need_iput_tmp)
424   - iput(need_iput_tmp);
425   -
426   - /* for each watch, send IN_UNMOUNT and then remove it */
427   - mutex_lock(&inode->inotify_mutex);
428   - watches = &inode->inotify_watches;
429   - list_for_each_entry_safe(watch, next_w, watches, i_list) {
430   - struct inotify_handle *ih= watch->ih;
431   - get_inotify_watch(watch);
432   - mutex_lock(&ih->mutex);
433   - ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
434   - NULL, NULL);
435   - inotify_remove_watch_locked(ih, watch);
436   - mutex_unlock(&ih->mutex);
437   - put_inotify_watch(watch);
438   - }
439   - mutex_unlock(&inode->inotify_mutex);
440   - iput(inode);
441   -
442   - spin_lock(&inode_lock);
443   - }
444   -}
445   -EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
446   -
447   -/**
448   - * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
449   - * @inode: inode that is about to be removed
450   - */
451   -void inotify_inode_is_dead(struct inode *inode)
452   -{
453   - struct inotify_watch *watch, *next;
454   -
455   - mutex_lock(&inode->inotify_mutex);
456   - list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
457   - struct inotify_handle *ih = watch->ih;
458   - mutex_lock(&ih->mutex);
459   - inotify_remove_watch_locked(ih, watch);
460   - mutex_unlock(&ih->mutex);
461   - }
462   - mutex_unlock(&inode->inotify_mutex);
463   -}
464   -EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
465   -
466   -/* Kernel Consumer API */
467   -
468   -/**
469   - * inotify_init - allocate and initialize an inotify instance
470   - * @ops: caller's inotify operations
471   - */
472   -struct inotify_handle *inotify_init(const struct inotify_operations *ops)
473   -{
474   - struct inotify_handle *ih;
475   -
476   - ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
477   - if (unlikely(!ih))
478   - return ERR_PTR(-ENOMEM);
479   -
480   - idr_init(&ih->idr);
481   - INIT_LIST_HEAD(&ih->watches);
482   - mutex_init(&ih->mutex);
483   - ih->last_wd = 0;
484   - ih->in_ops = ops;
485   - atomic_set(&ih->count, 0);
486   - get_inotify_handle(ih);
487   -
488   - return ih;
489   -}
490   -EXPORT_SYMBOL_GPL(inotify_init);
491   -
492   -/**
493   - * inotify_init_watch - initialize an inotify watch
494   - * @watch: watch to initialize
495   - */
496   -void inotify_init_watch(struct inotify_watch *watch)
497   -{
498   - INIT_LIST_HEAD(&watch->h_list);
499   - INIT_LIST_HEAD(&watch->i_list);
500   - atomic_set(&watch->count, 0);
501   - get_inotify_watch(watch); /* initial get */
502   -}
503   -EXPORT_SYMBOL_GPL(inotify_init_watch);
504   -
505   -/*
506   - * Watch removals suck violently. To kick the watch out we need (in this
507   - * order) inode->inotify_mutex and ih->mutex. That's fine if we have
508   - * a hold on inode; however, for all other cases we need to make damn sure
509   - * we don't race with umount. We can *NOT* just grab a reference to a
510   - * watch - inotify_unmount_inodes() will happily sail past it and we'll end
511   - * with reference to inode potentially outliving its superblock. Ideally
512   - * we just want to grab an active reference to superblock if we can; that
513   - * will make sure we won't go into inotify_umount_inodes() until we are
514   - * done. Cleanup is just deactivate_super(). However, that leaves a messy
515   - * case - what if we *are* racing with umount() and active references to
516   - * superblock can't be acquired anymore? We can bump ->s_count, grab
517   - * ->s_umount, which will almost certainly wait until the superblock is shut
518   - * down and the watch in question is pining for fjords. That's fine, but
519   - * there is a problem - we might have hit the window between ->s_active
520   - * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
521   - * is past the point of no return and is heading for shutdown) and the
522   - * moment when deactivate_super() acquires ->s_umount. We could just do
523   - * drop_super() yield() and retry, but that's rather antisocial and this
524   - * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having
525   - * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
526   - * that we won't race with inotify_umount_inodes(). So we could grab a
527   - * reference to watch and do the rest as above, just with drop_super() instead
528   - * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we
529   - * could grab ->s_umount. So the watch could've been gone already.
530   - *
531   - * That still can be dealt with - we need to save watch->wd, do idr_find()
532   - * and compare its result with our pointer. If they match, we either have
533   - * the damn thing still alive or we'd lost not one but two races at once,
534   - * the watch had been killed and a new one got created with the same ->wd
535   - * at the same address. That couldn't have happened in inotify_destroy(),
536   - * but inotify_rm_wd() could run into that. Still, "new one got created"
537   - * is not a problem - we have every right to kill it or leave it alone,
538   - * whatever's more convenient.
539   - *
540   - * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
541   - * "grab it and kill it" check. If it's been our original watch, we are
542   - * fine, if it's a newcomer - nevermind, just pretend that we'd won the
543   - * race and kill the fscker anyway; we are safe since we know that its
544   - * superblock won't be going away.
545   - *
546   - * And yes, this is far beyond mere "not very pretty"; so's the entire
547   - * concept of inotify to start with.
548   - */
549   -
550   -/**
551   - * pin_to_kill - pin the watch down for removal
552   - * @ih: inotify handle
553   - * @watch: watch to kill
554   - *
555   - * Called with ih->mutex held, drops it. Possible return values:
556   - * 0 - nothing to do, it has died
557   - * 1 - remove it, drop the reference and deactivate_super()
558   - * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
559   - * that variant, since it involved a lot of PITA, but that's the best that
560   - * could've been done.
561   - */
562   -static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
563   -{
564   - struct super_block *sb = watch->inode->i_sb;
565   - s32 wd = watch->wd;
566   -
567   - spin_lock(&sb_lock);
568   - if (sb->s_count >= S_BIAS) {
569   - atomic_inc(&sb->s_active);
570   - spin_unlock(&sb_lock);
571   - get_inotify_watch(watch);
572   - mutex_unlock(&ih->mutex);
573   - return 1; /* the best outcome */
574   - }
575   - sb->s_count++;
576   - spin_unlock(&sb_lock);
577   - mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
578   - down_read(&sb->s_umount);
579   - if (likely(!sb->s_root)) {
580   - /* fs is already shut down; the watch is dead */
581   - drop_super(sb);
582   - return 0;
583   - }
584   - /* raced with the final deactivate_super() */
585   - mutex_lock(&ih->mutex);
586   - if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
587   - /* the watch is dead */
588   - mutex_unlock(&ih->mutex);
589   - drop_super(sb);
590   - return 0;
591   - }
592   - /* still alive or freed and reused with the same sb and wd; kill */
593   - get_inotify_watch(watch);
594   - mutex_unlock(&ih->mutex);
595   - return 2;
596   -}
597   -
598   -static void unpin_and_kill(struct inotify_watch *watch, int how)
599   -{
600   - struct super_block *sb = watch->inode->i_sb;
601   - put_inotify_watch(watch);
602   - switch (how) {
603   - case 1:
604   - deactivate_super(sb);
605   - break;
606   - case 2:
607   - drop_super(sb);
608   - }
609   -}
610   -
611   -/**
612   - * inotify_destroy - clean up and destroy an inotify instance
613   - * @ih: inotify handle
614   - */
615   -void inotify_destroy(struct inotify_handle *ih)
616   -{
617   - /*
618   - * Destroy all of the watches for this handle. Unfortunately, not very
619   - * pretty. We cannot do a simple iteration over the list, because we
620   - * do not know the inode until we iterate to the watch. But we need to
621   - * hold inode->inotify_mutex before ih->mutex. The following works.
622   - *
623   - * AV: it had to become even uglier to start working ;-/
624   - */
625   - while (1) {
626   - struct inotify_watch *watch;
627   - struct list_head *watches;
628   - struct super_block *sb;
629   - struct inode *inode;
630   - int how;
631   -
632   - mutex_lock(&ih->mutex);
633   - watches = &ih->watches;
634   - if (list_empty(watches)) {
635   - mutex_unlock(&ih->mutex);
636   - break;
637   - }
638   - watch = list_first_entry(watches, struct inotify_watch, h_list);
639   - sb = watch->inode->i_sb;
640   - how = pin_to_kill(ih, watch);
641   - if (!how)
642   - continue;
643   -
644   - inode = watch->inode;
645   - mutex_lock(&inode->inotify_mutex);
646   - mutex_lock(&ih->mutex);
647   -
648   - /* make sure we didn't race with another list removal */
649   - if (likely(idr_find(&ih->idr, watch->wd))) {
650   - remove_watch_no_event(watch, ih);
651   - put_inotify_watch(watch);
652   - }
653   -
654   - mutex_unlock(&ih->mutex);
655   - mutex_unlock(&inode->inotify_mutex);
656   - unpin_and_kill(watch, how);
657   - }
658   -
659   - /* free this handle: the put matching the get in inotify_init() */
660   - put_inotify_handle(ih);
661   -}
662   -EXPORT_SYMBOL_GPL(inotify_destroy);
663   -
664   -/**
665   - * inotify_find_watch - find an existing watch for an (ih,inode) pair
666   - * @ih: inotify handle
667   - * @inode: inode to watch
668   - * @watchp: pointer to existing inotify_watch
669   - *
670   - * Caller must pin given inode (via nameidata).
671   - */
672   -s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
673   - struct inotify_watch **watchp)
674   -{
675   - struct inotify_watch *old;
676   - int ret = -ENOENT;
677   -
678   - mutex_lock(&inode->inotify_mutex);
679   - mutex_lock(&ih->mutex);
680   -
681   - old = inode_find_handle(inode, ih);
682   - if (unlikely(old)) {
683   - get_inotify_watch(old); /* caller must put watch */
684   - *watchp = old;
685   - ret = old->wd;
686   - }
687   -
688   - mutex_unlock(&ih->mutex);
689   - mutex_unlock(&inode->inotify_mutex);
690   -
691   - return ret;
692   -}
693   -EXPORT_SYMBOL_GPL(inotify_find_watch);
694   -
695   -/**
696   - * inotify_find_update_watch - find and update the mask of an existing watch
697   - * @ih: inotify handle
698   - * @inode: inode's watch to update
699   - * @mask: mask of events to watch
700   - *
701   - * Caller must pin given inode (via nameidata).
702   - */
703   -s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
704   - u32 mask)
705   -{
706   - struct inotify_watch *old;
707   - int mask_add = 0;
708   - int ret;
709   -
710   - if (mask & IN_MASK_ADD)
711   - mask_add = 1;
712   -
713   - /* don't allow invalid bits: we don't want flags set */
714   - mask &= IN_ALL_EVENTS | IN_ONESHOT;
715   - if (unlikely(!mask))
716   - return -EINVAL;
717   -
718   - mutex_lock(&inode->inotify_mutex);
719   - mutex_lock(&ih->mutex);
720   -
721   - /*
722   - * Handle the case of re-adding a watch on an (inode,ih) pair that we
723   - * are already watching. We just update the mask and return its wd.
724   - */
725   - old = inode_find_handle(inode, ih);
726   - if (unlikely(!old)) {
727   - ret = -ENOENT;
728   - goto out;
729   - }
730   -
731   - if (mask_add)
732   - old->mask |= mask;
733   - else
734   - old->mask = mask;
735   - ret = old->wd;
736   -out:
737   - mutex_unlock(&ih->mutex);
738   - mutex_unlock(&inode->inotify_mutex);
739   - return ret;
740   -}
741   -EXPORT_SYMBOL_GPL(inotify_find_update_watch);
742   -
743   -/**
744   - * inotify_add_watch - add a watch to an inotify instance
745   - * @ih: inotify handle
746   - * @watch: caller allocated watch structure
747   - * @inode: inode to watch
748   - * @mask: mask of events to watch
749   - *
750   - * Caller must pin given inode (via nameidata).
751   - * Caller must ensure it only calls inotify_add_watch() once per watch.
752   - * Calls inotify_handle_get_wd() so may sleep.
753   - */
754   -s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
755   - struct inode *inode, u32 mask)
756   -{
757   - int ret = 0;
758   - int newly_watched;
759   -
760   - /* don't allow invalid bits: we don't want flags set */
761   - mask &= IN_ALL_EVENTS | IN_ONESHOT;
762   - if (unlikely(!mask))
763   - return -EINVAL;
764   - watch->mask = mask;
765   -
766   - mutex_lock(&inode->inotify_mutex);
767   - mutex_lock(&ih->mutex);
768   -
769   - /* Initialize a new watch */
770   - ret = inotify_handle_get_wd(ih, watch);
771   - if (unlikely(ret))
772   - goto out;
773   - ret = watch->wd;
774   -
775   - /* save a reference to handle and bump the count to make it official */
776   - get_inotify_handle(ih);
777   - watch->ih = ih;
778   -
779   - /*
780   - * Save a reference to the inode and bump the ref count to make it
781   - * official. We hold a reference to nameidata, which makes this safe.
782   - */
783   - watch->inode = igrab(inode);
784   -
785   - /* Add the watch to the handle's and the inode's list */
786   - newly_watched = !inotify_inode_watched(inode);
787   - list_add(&watch->h_list, &ih->watches);
788   - list_add(&watch->i_list, &inode->inotify_watches);
789   - /*
790   - * Set child flags _after_ adding the watch, so there is no race
791   - * windows where newly instantiated children could miss their parent's
792   - * watched flag.
793   - */
794   - if (newly_watched)
795   - set_dentry_child_flags(inode, 1);
796   -
797   -out:
798   - mutex_unlock(&ih->mutex);
799   - mutex_unlock(&inode->inotify_mutex);
800   - return ret;
801   -}
802   -EXPORT_SYMBOL_GPL(inotify_add_watch);
803   -
804   -/**
805   - * inotify_clone_watch - put the watch next to existing one
806   - * @old: already installed watch
807   - * @new: new watch
808   - *
809   - * Caller must hold the inotify_mutex of inode we are dealing with;
810   - * it is expected to remove the old watch before unlocking the inode.
811   - */
812   -s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
813   -{
814   - struct inotify_handle *ih = old->ih;
815   - int ret = 0;
816   -
817   - new->mask = old->mask;
818   - new->ih = ih;
819   -
820   - mutex_lock(&ih->mutex);
821   -
822   - /* Initialize a new watch */
823   - ret = inotify_handle_get_wd(ih, new);
824   - if (unlikely(ret))
825   - goto out;
826   - ret = new->wd;
827   -
828   - get_inotify_handle(ih);
829   -
830   - new->inode = igrab(old->inode);
831   -
832   - list_add(&new->h_list, &ih->watches);
833   - list_add(&new->i_list, &old->inode->inotify_watches);
834   -out:
835   - mutex_unlock(&ih->mutex);
836   - return ret;
837   -}
838   -
839   -void inotify_evict_watch(struct inotify_watch *watch)
840   -{
841   - get_inotify_watch(watch);
842   - mutex_lock(&watch->ih->mutex);
843   - inotify_remove_watch_locked(watch->ih, watch);
844   - mutex_unlock(&watch->ih->mutex);
845   -}
846   -
847   -/**
848   - * inotify_rm_wd - remove a watch from an inotify instance
849   - * @ih: inotify handle
850   - * @wd: watch descriptor to remove
851   - *
852   - * Can sleep.
853   - */
854   -int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
855   -{
856   - struct inotify_watch *watch;
857   - struct super_block *sb;
858   - struct inode *inode;
859   - int how;
860   -
861   - mutex_lock(&ih->mutex);
862   - watch = idr_find(&ih->idr, wd);
863   - if (unlikely(!watch)) {
864   - mutex_unlock(&ih->mutex);
865   - return -EINVAL;
866   - }
867   - sb = watch->inode->i_sb;
868   - how = pin_to_kill(ih, watch);
869   - if (!how)
870   - return 0;
871   -
872   - inode = watch->inode;
873   -
874   - mutex_lock(&inode->inotify_mutex);
875   - mutex_lock(&ih->mutex);
876   -
877   - /* make sure that we did not race */
878   - if (likely(idr_find(&ih->idr, wd) == watch))
879   - inotify_remove_watch_locked(ih, watch);
880   -
881   - mutex_unlock(&ih->mutex);
882   - mutex_unlock(&inode->inotify_mutex);
883   - unpin_and_kill(watch, how);
884   -
885   - return 0;
886   -}
887   -EXPORT_SYMBOL_GPL(inotify_rm_wd);
888   -
889   -/**
890   - * inotify_rm_watch - remove a watch from an inotify instance
891   - * @ih: inotify handle
892   - * @watch: watch to remove
893   - *
894   - * Can sleep.
895   - */
896   -int inotify_rm_watch(struct inotify_handle *ih,
897   - struct inotify_watch *watch)
898   -{
899   - return inotify_rm_wd(ih, watch->wd);
900   -}
901   -EXPORT_SYMBOL_GPL(inotify_rm_watch);
902   -
903   -/*
904   - * inotify_setup - core initialization function
905   - */
906   -static int __init inotify_setup(void)
907   -{
908   - atomic_set(&inotify_cookie, 0);
909   -
910   - return 0;
911   -}
912   -
913   -module_init(inotify_setup);
fs/inotify_user.c
1   -/*
2   - * fs/inotify_user.c - inotify support for userspace
3   - *
4   - * Authors:
5   - * John McCutchan <ttb@tentacle.dhs.org>
6   - * Robert Love <rml@novell.com>
7   - *
8   - * Copyright (C) 2005 John McCutchan
9   - * Copyright 2006 Hewlett-Packard Development Company, L.P.
10   - *
11   - * This program is free software; you can redistribute it and/or modify it
12   - * under the terms of the GNU General Public License as published by the
13   - * Free Software Foundation; either version 2, or (at your option) any
14   - * later version.
15   - *
16   - * This program is distributed in the hope that it will be useful, but
17   - * WITHOUT ANY WARRANTY; without even the implied warranty of
18   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19   - * General Public License for more details.
20   - */
21   -
22   -#include <linux/kernel.h>
23   -#include <linux/sched.h>
24   -#include <linux/slab.h>
25   -#include <linux/fs.h>
26   -#include <linux/file.h>
27   -#include <linux/mount.h>
28   -#include <linux/namei.h>
29   -#include <linux/poll.h>
30   -#include <linux/init.h>
31   -#include <linux/list.h>
32   -#include <linux/inotify.h>
33   -#include <linux/syscalls.h>
34   -#include <linux/magic.h>
35   -
36   -#include <asm/ioctls.h>
37   -
38   -static struct kmem_cache *watch_cachep __read_mostly;
39   -static struct kmem_cache *event_cachep __read_mostly;
40   -
41   -static struct vfsmount *inotify_mnt __read_mostly;
42   -
43   -/* these are configurable via /proc/sys/fs/inotify/ */
44   -static int inotify_max_user_instances __read_mostly;
45   -static int inotify_max_user_watches __read_mostly;
46   -static int inotify_max_queued_events __read_mostly;
47   -
48   -/*
49   - * Lock ordering:
50   - *
51   - * inotify_dev->up_mutex (ensures we don't re-add the same watch)
52   - * inode->inotify_mutex (protects inode's watch list)
53   - * inotify_handle->mutex (protects inotify_handle's watch list)
54   - * inotify_dev->ev_mutex (protects device's event queue)
55   - */
56   -
57   -/*
58   - * Lifetimes of the main data structures:
59   - *
60   - * inotify_device: Lifetime is managed by reference count, from
61   - * sys_inotify_init() until release. Additional references can bump the count
62   - * via get_inotify_dev() and drop the count via put_inotify_dev().
63   - *
64   - * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
65   - * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
66   - * first event, or to inotify_destroy().
67   - */
68   -
69   -/*
70   - * struct inotify_device - represents an inotify instance
71   - *
72   - * This structure is protected by the mutex 'mutex'.
73   - */
74   -struct inotify_device {
75   - wait_queue_head_t wq; /* wait queue for i/o */
76   - struct mutex ev_mutex; /* protects event queue */
77   - struct mutex up_mutex; /* synchronizes watch updates */
78   - struct list_head events; /* list of queued events */
79   - struct user_struct *user; /* user who opened this dev */
80   - struct inotify_handle *ih; /* inotify handle */
81   - struct fasync_struct *fa; /* async notification */
82   - atomic_t count; /* reference count */
83   - unsigned int queue_size; /* size of the queue (bytes) */
84   - unsigned int event_count; /* number of pending events */
85   - unsigned int max_events; /* maximum number of events */
86   -};
87   -
88   -/*
89   - * struct inotify_kernel_event - An inotify event, originating from a watch and
90   - * queued for user-space. A list of these is attached to each instance of the
91   - * device. In read(), this list is walked and all events that can fit in the
92   - * buffer are returned.
93   - *
94   - * Protected by dev->ev_mutex of the device in which we are queued.
95   - */
96   -struct inotify_kernel_event {
97   - struct inotify_event event; /* the user-space event */
98   - struct list_head list; /* entry in inotify_device's list */
99   - char *name; /* filename, if any */
100   -};
101   -
102   -/*
103   - * struct inotify_user_watch - our version of an inotify_watch, we add
104   - * a reference to the associated inotify_device.
105   - */
106   -struct inotify_user_watch {
107   - struct inotify_device *dev; /* associated device */
108   - struct inotify_watch wdata; /* inotify watch data */
109   -};
110   -
111   -#ifdef CONFIG_SYSCTL
112   -
113   -#include <linux/sysctl.h>
114   -
115   -static int zero;
116   -
117   -ctl_table inotify_table[] = {
118   - {
119   - .ctl_name = INOTIFY_MAX_USER_INSTANCES,
120   - .procname = "max_user_instances",
121   - .data = &inotify_max_user_instances,
122   - .maxlen = sizeof(int),
123   - .mode = 0644,
124   - .proc_handler = &proc_dointvec_minmax,
125   - .strategy = &sysctl_intvec,
126   - .extra1 = &zero,
127   - },
128   - {
129   - .ctl_name = INOTIFY_MAX_USER_WATCHES,
130   - .procname = "max_user_watches",
131   - .data = &inotify_max_user_watches,
132   - .maxlen = sizeof(int),
133   - .mode = 0644,
134   - .proc_handler = &proc_dointvec_minmax,
135   - .strategy = &sysctl_intvec,
136   - .extra1 = &zero,
137   - },
138   - {
139   - .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
140   - .procname = "max_queued_events",
141   - .data = &inotify_max_queued_events,
142   - .maxlen = sizeof(int),
143   - .mode = 0644,
144   - .proc_handler = &proc_dointvec_minmax,
145   - .strategy = &sysctl_intvec,
146   - .extra1 = &zero
147   - },
148   - { .ctl_name = 0 }
149   -};
150   -#endif /* CONFIG_SYSCTL */
151   -
152   -static inline void get_inotify_dev(struct inotify_device *dev)
153   -{
154   - atomic_inc(&dev->count);
155   -}
156   -
157   -static inline void put_inotify_dev(struct inotify_device *dev)
158   -{
159   - if (atomic_dec_and_test(&dev->count)) {
160   - atomic_dec(&dev->user->inotify_devs);
161   - free_uid(dev->user);
162   - kfree(dev);
163   - }
164   -}
165   -
166   -/*
167   - * free_inotify_user_watch - cleans up the watch and its references
168   - */
169   -static void free_inotify_user_watch(struct inotify_watch *w)
170   -{
171   - struct inotify_user_watch *watch;
172   - struct inotify_device *dev;
173   -
174   - watch = container_of(w, struct inotify_user_watch, wdata);
175   - dev = watch->dev;
176   -
177   - atomic_dec(&dev->user->inotify_watches);
178   - put_inotify_dev(dev);
179   - kmem_cache_free(watch_cachep, watch);
180   -}
181   -
182   -/*
183   - * kernel_event - create a new kernel event with the given parameters
184   - *
185   - * This function can sleep.
186   - */
187   -static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
188   - const char *name)
189   -{
190   - struct inotify_kernel_event *kevent;
191   -
192   - kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
193   - if (unlikely(!kevent))
194   - return NULL;
195   -
196   - /* we hand this out to user-space, so zero it just in case */
197   - memset(&kevent->event, 0, sizeof(struct inotify_event));
198   -
199   - kevent->event.wd = wd;
200   - kevent->event.mask = mask;
201   - kevent->event.cookie = cookie;
202   -
203   - INIT_LIST_HEAD(&kevent->list);
204   -
205   - if (name) {
206   - size_t len, rem, event_size = sizeof(struct inotify_event);
207   -
208   - /*
209   - * We need to pad the filename so as to properly align an
210   - * array of inotify_event structures. Because the structure is
211   - * small and the common case is a small filename, we just round
212   - * up to the next multiple of the structure's sizeof. This is
213   - * simple and safe for all architectures.
214   - */
215   - len = strlen(name) + 1;
216   - rem = event_size - len;
217   - if (len > event_size) {
218   - rem = event_size - (len % event_size);
219   - if (len % event_size == 0)
220   - rem = 0;
221   - }
222   -
223   - kevent->name = kmalloc(len + rem, GFP_KERNEL);
224   - if (unlikely(!kevent->name)) {
225   - kmem_cache_free(event_cachep, kevent);
226   - return NULL;
227   - }
228   - memcpy(kevent->name, name, len);
229   - if (rem)
230   - memset(kevent->name + len, 0, rem);
231   - kevent->event.len = len + rem;
232   - } else {
233   - kevent->event.len = 0;
234   - kevent->name = NULL;
235   - }
236   -
237   - return kevent;
238   -}
239   -
240   -/*
241   - * inotify_dev_get_event - return the next event in the given dev's queue
242   - *
243   - * Caller must hold dev->ev_mutex.
244   - */
245   -static inline struct inotify_kernel_event *
246   -inotify_dev_get_event(struct inotify_device *dev)
247   -{
248   - return list_entry(dev->events.next, struct inotify_kernel_event, list);
249   -}
250   -
251   -/*
252   - * inotify_dev_get_last_event - return the last event in the given dev's queue
253   - *
254   - * Caller must hold dev->ev_mutex.
255   - */
256   -static inline struct inotify_kernel_event *
257   -inotify_dev_get_last_event(struct inotify_device *dev)
258   -{
259   - if (list_empty(&dev->events))
260   - return NULL;
261   - return list_entry(dev->events.prev, struct inotify_kernel_event, list);
262   -}
263   -
264   -/*
265   - * inotify_dev_queue_event - event handler registered with core inotify, adds
266   - * a new event to the given device
267   - *
268   - * Can sleep (calls kernel_event()).
269   - */
270   -static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
271   - u32 cookie, const char *name,
272   - struct inode *ignored)
273   -{
274   - struct inotify_user_watch *watch;
275   - struct inotify_device *dev;
276   - struct inotify_kernel_event *kevent, *last;
277   -
278   - watch = container_of(w, struct inotify_user_watch, wdata);
279   - dev = watch->dev;
280   -
281   - mutex_lock(&dev->ev_mutex);
282   -
283   - /* we can safely put the watch as we don't reference it while
284   - * generating the event
285   - */
286   - if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
287   - put_inotify_watch(w); /* final put */
288   -
289   - /* coalescing: drop this event if it is a dupe of the previous */
290   - last = inotify_dev_get_last_event(dev);
291   - if (last && last->event.mask == mask && last->event.wd == wd &&
292   - last->event.cookie == cookie) {
293   - const char *lastname = last->name;
294   -
295   - if (!name && !lastname)
296   - goto out;
297   - if (name && lastname && !strcmp(lastname, name))
298   - goto out;
299   - }
300   -
301   - /* the queue overflowed and we already sent the Q_OVERFLOW event */
302   - if (unlikely(dev->event_count > dev->max_events))
303   - goto out;
304   -
305   - /* if the queue overflows, we need to notify user space */
306   - if (unlikely(dev->event_count == dev->max_events))
307   - kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
308   - else
309   - kevent = kernel_event(wd, mask, cookie, name);
310   -
311   - if (unlikely(!kevent))
312   - goto out;
313   -
314   - /* queue the event and wake up anyone waiting */
315   - dev->event_count++;
316   - dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
317   - list_add_tail(&kevent->list, &dev->events);
318   - wake_up_interruptible(&dev->wq);
319   - kill_fasync(&dev->fa, SIGIO, POLL_IN);
320   -
321   -out:
322   - mutex_unlock(&dev->ev_mutex);
323   -}
324   -
325   -/*
326   - * remove_kevent - cleans up the given kevent
327   - *
328   - * Caller must hold dev->ev_mutex.
329   - */
330   -static void remove_kevent(struct inotify_device *dev,
331   - struct inotify_kernel_event *kevent)
332   -{
333   - list_del(&kevent->list);
334   -
335   - dev->event_count--;
336   - dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
337   -}
338   -
339   -/*
340   - * free_kevent - frees the given kevent.
341   - */
342   -static void free_kevent(struct inotify_kernel_event *kevent)
343   -{
344   - kfree(kevent->name);
345   - kmem_cache_free(event_cachep, kevent);
346   -}
347   -
348   -/*
349   - * inotify_dev_event_dequeue - destroy an event on the given device
350   - *
351   - * Caller must hold dev->ev_mutex.
352   - */
353   -static void inotify_dev_event_dequeue(struct inotify_device *dev)
354   -{
355   - if (!list_empty(&dev->events)) {
356   - struct inotify_kernel_event *kevent;
357   - kevent = inotify_dev_get_event(dev);
358   - remove_kevent(dev, kevent);
359   - free_kevent(kevent);
360   - }
361   -}
362   -
363   -/*
364   - * find_inode - resolve a user-given path to a specific inode
365   - */
366   -static int find_inode(const char __user *dirname, struct path *path,
367   - unsigned flags)
368   -{
369   - int error;
370   -
371   - error = user_path_at(AT_FDCWD, dirname, flags, path);
372   - if (error)
373   - return error;
374   - /* you can only watch an inode if you have read permissions on it */
375   - error = inode_permission(path->dentry->d_inode, MAY_READ);
376   - if (error)
377   - path_put(path);
378   - return error;
379   -}
380   -
381   -/*
382   - * create_watch - creates a watch on the given device.
383   - *
384   - * Callers must hold dev->up_mutex.
385   - */
386   -static int create_watch(struct inotify_device *dev, struct inode *inode,
387   - u32 mask)
388   -{
389   - struct inotify_user_watch *watch;
390   - int ret;
391   -
392   - if (atomic_read(&dev->user->inotify_watches) >=
393   - inotify_max_user_watches)
394   - return -ENOSPC;
395   -
396   - watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
397   - if (unlikely(!watch))
398   - return -ENOMEM;
399   -
400   - /* save a reference to device and bump the count to make it official */
401   - get_inotify_dev(dev);
402   - watch->dev = dev;
403   -
404   - atomic_inc(&dev->user->inotify_watches);
405   -
406   - inotify_init_watch(&watch->wdata);
407   - ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
408   - if (ret < 0)
409   - free_inotify_user_watch(&watch->wdata);
410   -
411   - return ret;
412   -}
413   -
414   -/* Device Interface */
415   -
416   -static unsigned int inotify_poll(struct file *file, poll_table *wait)
417   -{
418   - struct inotify_device *dev = file->private_data;
419   - int ret = 0;
420   -
421   - poll_wait(file, &dev->wq, wait);
422   - mutex_lock(&dev->ev_mutex);
423   - if (!list_empty(&dev->events))
424   - ret = POLLIN | POLLRDNORM;
425   - mutex_unlock(&dev->ev_mutex);
426   -
427   - return ret;
428   -}
429   -
430   -static ssize_t inotify_read(struct file *file, char __user *buf,
431   - size_t count, loff_t *pos)
432   -{
433   - size_t event_size = sizeof (struct inotify_event);
434   - struct inotify_device *dev;
435   - char __user *start;
436   - int ret;
437   - DEFINE_WAIT(wait);
438   -
439   - start = buf;
440   - dev = file->private_data;
441   -
442   - while (1) {
443   -
444   - prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
445   -
446   - mutex_lock(&dev->ev_mutex);
447   - if (!list_empty(&dev->events)) {
448   - ret = 0;
449   - break;
450   - }
451   - mutex_unlock(&dev->ev_mutex);
452   -
453   - if (file->f_flags & O_NONBLOCK) {
454   - ret = -EAGAIN;
455   - break;
456   - }
457   -
458   - if (signal_pending(current)) {
459   - ret = -EINTR;
460   - break;
461   - }
462   -
463   - schedule();
464   - }
465   -
466   - finish_wait(&dev->wq, &wait);
467   - if (ret)
468   - return ret;
469   -
470   - while (1) {
471   - struct inotify_kernel_event *kevent;
472   -
473   - ret = buf - start;
474   - if (list_empty(&dev->events))
475   - break;
476   -
477   - kevent = inotify_dev_get_event(dev);
478   - if (event_size + kevent->event.len > count) {
479   - if (ret == 0 && count > 0) {
480   - /*
481   - * could not get a single event because we
482   - * didn't have enough buffer space.
483   - */
484   - ret = -EINVAL;
485   - }
486   - break;
487   - }
488   - remove_kevent(dev, kevent);
489   -
490   - /*
491   - * Must perform the copy_to_user outside the mutex in order
492   - * to avoid a lock order reversal with mmap_sem.
493   - */
494   - mutex_unlock(&dev->ev_mutex);
495   -
496   - if (copy_to_user(buf, &kevent->event, event_size)) {
497   - ret = -EFAULT;
498   - break;
499   - }
500   - buf += event_size;
501   - count -= event_size;
502   -
503   - if (kevent->name) {
504   - if (copy_to_user(buf, kevent->name, kevent->event.len)){
505   - ret = -EFAULT;
506   - break;
507   - }
508   - buf += kevent->event.len;
509   - count -= kevent->event.len;
510   - }
511   -
512   - free_kevent(kevent);
513   -
514   - mutex_lock(&dev->ev_mutex);
515   - }
516   - mutex_unlock(&dev->ev_mutex);
517   -
518   - return ret;
519   -}
520   -
521   -static int inotify_fasync(int fd, struct file *file, int on)
522   -{
523   - struct inotify_device *dev = file->private_data;
524   -
525   - return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
526   -}
527   -
528   -static int inotify_release(struct inode *ignored, struct file *file)
529   -{
530   - struct inotify_device *dev = file->private_data;
531   -
532   - inotify_destroy(dev->ih);
533   -
534   - /* destroy all of the events on this device */
535   - mutex_lock(&dev->ev_mutex);
536   - while (!list_empty(&dev->events))
537   - inotify_dev_event_dequeue(dev);
538   - mutex_unlock(&dev->ev_mutex);
539   -
540   - /* free this device: the put matching the get in inotify_init() */
541   - put_inotify_dev(dev);
542   -
543   - return 0;
544   -}
545   -
546   -static long inotify_ioctl(struct file *file, unsigned int cmd,
547   - unsigned long arg)
548   -{
549   - struct inotify_device *dev;
550   - void __user *p;
551   - int ret = -ENOTTY;
552   -
553   - dev = file->private_data;
554   - p = (void __user *) arg;
555   -
556   - switch (cmd) {
557   - case FIONREAD:
558   - ret = put_user(dev->queue_size, (int __user *) p);
559   - break;
560   - }
561   -
562   - return ret;
563   -}
564   -
565   -static const struct file_operations inotify_fops = {
566   - .poll = inotify_poll,
567   - .read = inotify_read,
568   - .fasync = inotify_fasync,
569   - .release = inotify_release,
570   - .unlocked_ioctl = inotify_ioctl,
571   - .compat_ioctl = inotify_ioctl,
572   -};
573   -
574   -static const struct inotify_operations inotify_user_ops = {
575   - .handle_event = inotify_dev_queue_event,
576   - .destroy_watch = free_inotify_user_watch,
577   -};
578   -
579   -asmlinkage long sys_inotify_init1(int flags)
580   -{
581   - struct inotify_device *dev;
582   - struct inotify_handle *ih;
583   - struct user_struct *user;
584   - struct file *filp;
585   - int fd, ret;
586   -
587   - /* Check the IN_* constants for consistency. */
588   - BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
589   - BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
590   -
591   - if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
592   - return -EINVAL;
593   -
594   - fd = get_unused_fd_flags(flags & O_CLOEXEC);
595   - if (fd < 0)
596   - return fd;
597   -
598   - filp = get_empty_filp();
599   - if (!filp) {
600   - ret = -ENFILE;
601   - goto out_put_fd;
602   - }
603   -
604   - user = get_current_user();
605   - if (unlikely(atomic_read(&user->inotify_devs) >=
606   - inotify_max_user_instances)) {
607   - ret = -EMFILE;
608   - goto out_free_uid;
609   - }
610   -
611   - dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
612   - if (unlikely(!dev)) {
613   - ret = -ENOMEM;
614   - goto out_free_uid;
615   - }
616   -
617   - ih = inotify_init(&inotify_user_ops);
618   - if (IS_ERR(ih)) {
619   - ret = PTR_ERR(ih);
620   - goto out_free_dev;
621   - }
622   - dev->ih = ih;
623   - dev->fa = NULL;
624   -
625   - filp->f_op = &inotify_fops;
626   - filp->f_path.mnt = mntget(inotify_mnt);
627   - filp->f_path.dentry = dget(inotify_mnt->mnt_root);
628   - filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
629   - filp->f_mode = FMODE_READ;
630   - filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
631   - filp->private_data = dev;
632   -
633   - INIT_LIST_HEAD(&dev->events);
634   - init_waitqueue_head(&dev->wq);
635   - mutex_init(&dev->ev_mutex);
636   - mutex_init(&dev->up_mutex);
637   - dev->event_count = 0;
638   - dev->queue_size = 0;
639   - dev->max_events = inotify_max_queued_events;
640   - dev->user = user;
641   - atomic_set(&dev->count, 0);
642   -
643   - get_inotify_dev(dev);
644   - atomic_inc(&user->inotify_devs);
645   - fd_install(fd, filp);
646   -
647   - return fd;
648   -out_free_dev:
649   - kfree(dev);
650   -out_free_uid:
651   - free_uid(user);
652   - put_filp(filp);
653   -out_put_fd:
654   - put_unused_fd(fd);
655   - return ret;
656   -}
657   -
658   -asmlinkage long sys_inotify_init(void)
659   -{
660   - return sys_inotify_init1(0);
661   -}
662   -
663   -asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
664   -{
665   - struct inode *inode;
666   - struct inotify_device *dev;
667   - struct path path;
668   - struct file *filp;
669   - int ret, fput_needed;
670   - unsigned flags = 0;
671   -
672   - filp = fget_light(fd, &fput_needed);
673   - if (unlikely(!filp))
674   - return -EBADF;
675   -
676   - /* verify that this is indeed an inotify instance */
677   - if (unlikely(filp->f_op != &inotify_fops)) {
678   - ret = -EINVAL;
679   - goto fput_and_out;
680   - }
681   -
682   - if (!(mask & IN_DONT_FOLLOW))
683   - flags |= LOOKUP_FOLLOW;
684   - if (mask & IN_ONLYDIR)
685   - flags |= LOOKUP_DIRECTORY;
686   -
687   - ret = find_inode(pathname, &path, flags);
688   - if (unlikely(ret))
689   - goto fput_and_out;
690   -
691   - /* inode held in place by reference to path; dev by fget on fd */
692   - inode = path.dentry->d_inode;
693   - dev = filp->private_data;
694   -
695   - mutex_lock(&dev->up_mutex);
696   - ret = inotify_find_update_watch(dev->ih, inode, mask);
697   - if (ret == -ENOENT)
698   - ret = create_watch(dev, inode, mask);
699   - mutex_unlock(&dev->up_mutex);
700   -
701   - path_put(&path);
702   -fput_and_out:
703   - fput_light(filp, fput_needed);
704   - return ret;
705   -}
706   -
707   -asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
708   -{
709   - struct file *filp;
710   - struct inotify_device *dev;
711   - int ret, fput_needed;
712   -
713   - filp = fget_light(fd, &fput_needed);
714   - if (unlikely(!filp))
715   - return -EBADF;
716   -
717   - /* verify that this is indeed an inotify instance */
718   - if (unlikely(filp->f_op != &inotify_fops)) {
719   - ret = -EINVAL;
720   - goto out;
721   - }
722   -
723   - dev = filp->private_data;
724   -
725   - /* we free our watch data when we get IN_IGNORED */
726   - ret = inotify_rm_wd(dev->ih, wd);
727   -
728   -out:
729   - fput_light(filp, fput_needed);
730   - return ret;
731   -}
732   -
733   -static int
734   -inotify_get_sb(struct file_system_type *fs_type, int flags,
735   - const char *dev_name, void *data, struct vfsmount *mnt)
736   -{
737   - return get_sb_pseudo(fs_type, "inotify", NULL,
738   - INOTIFYFS_SUPER_MAGIC, mnt);
739   -}
740   -
741   -static struct file_system_type inotify_fs_type = {
742   - .name = "inotifyfs",
743   - .get_sb = inotify_get_sb,
744   - .kill_sb = kill_anon_super,
745   -};
746   -
747   -/*
748   - * inotify_user_setup - Our initialization function. Note that we cannnot return
749   - * error because we have compiled-in VFS hooks. So an (unlikely) failure here
750   - * must result in panic().
751   - */
752   -static int __init inotify_user_setup(void)
753   -{
754   - int ret;
755   -
756   - ret = register_filesystem(&inotify_fs_type);
757   - if (unlikely(ret))
758   - panic("inotify: register_filesystem returned %d!\n", ret);
759   -
760   - inotify_mnt = kern_mount(&inotify_fs_type);
761   - if (IS_ERR(inotify_mnt))
762   - panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
763   -
764   - inotify_max_queued_events = 16384;
765   - inotify_max_user_instances = 128;
766   - inotify_max_user_watches = 8192;
767   -
768   - watch_cachep = kmem_cache_create("inotify_watch_cache",
769   - sizeof(struct inotify_user_watch),
770   - 0, SLAB_PANIC, NULL);
771   - event_cachep = kmem_cache_create("inotify_event_cache",
772   - sizeof(struct inotify_kernel_event),
773   - 0, SLAB_PANIC, NULL);
774   -
775   - return 0;
776   -}
777   -
778   -module_init(inotify_user_setup);
  1 +source "fs/notify/dnotify/Kconfig"
  2 +source "fs/notify/inotify/Kconfig"
  1 +obj-y += dnotify/
  2 +obj-y += inotify/
fs/notify/dnotify/Kconfig
  1 +config DNOTIFY
  2 + bool "Dnotify support"
  3 + default y
  4 + help
  5 + Dnotify is a directory-based per-fd file change notification system
  6 + that uses signals to communicate events to user-space. There exist
  7 + superior alternatives, but some applications may still rely on
  8 + dnotify.
  9 +
  10 + If unsure, say Y.
fs/notify/dnotify/Makefile
  1 +obj-$(CONFIG_DNOTIFY) += dnotify.o
fs/notify/dnotify/dnotify.c
  1 +/*
  2 + * Directory notifications for Linux.
  3 + *
  4 + * Copyright (C) 2000,2001,2002 Stephen Rothwell
  5 + *
  6 + * This program is free software; you can redistribute it and/or modify it
  7 + * under the terms of the GNU General Public License as published by the
  8 + * Free Software Foundation; either version 2, or (at your option) any
  9 + * later version.
  10 + *
  11 + * This program is distributed in the hope that it will be useful, but
  12 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14 + * General Public License for more details.
  15 + */
  16 +#include <linux/fs.h>
  17 +#include <linux/module.h>
  18 +#include <linux/sched.h>
  19 +#include <linux/dnotify.h>
  20 +#include <linux/init.h>
  21 +#include <linux/spinlock.h>
  22 +#include <linux/slab.h>
  23 +#include <linux/fdtable.h>
  24 +
  25 +int dir_notify_enable __read_mostly = 1;
  26 +
  27 +static struct kmem_cache *dn_cache __read_mostly;
  28 +
  29 +static void redo_inode_mask(struct inode *inode)
  30 +{
  31 + unsigned long new_mask;
  32 + struct dnotify_struct *dn;
  33 +
  34 + new_mask = 0;
  35 + for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
  36 + new_mask |= dn->dn_mask & ~DN_MULTISHOT;
  37 + inode->i_dnotify_mask = new_mask;
  38 +}
  39 +
  40 +void dnotify_flush(struct file *filp, fl_owner_t id)
  41 +{
  42 + struct dnotify_struct *dn;
  43 + struct dnotify_struct **prev;
  44 + struct inode *inode;
  45 +
  46 + inode = filp->f_path.dentry->d_inode;
  47 + if (!S_ISDIR(inode->i_mode))
  48 + return;
  49 + spin_lock(&inode->i_lock);
  50 + prev = &inode->i_dnotify;
  51 + while ((dn = *prev) != NULL) {
  52 + if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
  53 + *prev = dn->dn_next;
  54 + redo_inode_mask(inode);
  55 + kmem_cache_free(dn_cache, dn);
  56 + break;
  57 + }
  58 + prev = &dn->dn_next;
  59 + }
  60 + spin_unlock(&inode->i_lock);
  61 +}
  62 +
  63 +int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
  64 +{
  65 + struct dnotify_struct *dn;
  66 + struct dnotify_struct *odn;
  67 + struct dnotify_struct **prev;
  68 + struct inode *inode;
  69 + fl_owner_t id = current->files;
  70 + struct file *f;
  71 + int error = 0;
  72 +
  73 + if ((arg & ~DN_MULTISHOT) == 0) {
  74 + dnotify_flush(filp, id);
  75 + return 0;
  76 + }
  77 + if (!dir_notify_enable)
  78 + return -EINVAL;
  79 + inode = filp->f_path.dentry->d_inode;
  80 + if (!S_ISDIR(inode->i_mode))
  81 + return -ENOTDIR;
  82 + dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
  83 + if (dn == NULL)
  84 + return -ENOMEM;
  85 + spin_lock(&inode->i_lock);
  86 + prev = &inode->i_dnotify;
  87 + while ((odn = *prev) != NULL) {
  88 + if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
  89 + odn->dn_fd = fd;
  90 + odn->dn_mask |= arg;
  91 + inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
  92 + goto out_free;
  93 + }
  94 + prev = &odn->dn_next;
  95 + }
  96 +
  97 + rcu_read_lock();
  98 + f = fcheck(fd);
  99 + rcu_read_unlock();
  100 + /* we'd lost the race with close(), sod off silently */
  101 + /* note that inode->i_lock prevents reordering problems
  102 + * between accesses to descriptor table and ->i_dnotify */
  103 + if (f != filp)
  104 + goto out_free;
  105 +
  106 + error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
  107 + if (error)
  108 + goto out_free;
  109 +
  110 + dn->dn_mask = arg;
  111 + dn->dn_fd = fd;
  112 + dn->dn_filp = filp;
  113 + dn->dn_owner = id;
  114 + inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
  115 + dn->dn_next = inode->i_dnotify;
  116 + inode->i_dnotify = dn;
  117 + spin_unlock(&inode->i_lock);
  118 + return 0;
  119 +
  120 +out_free:
  121 + spin_unlock(&inode->i_lock);
  122 + kmem_cache_free(dn_cache, dn);
  123 + return error;
  124 +}
  125 +
  126 +void __inode_dir_notify(struct inode *inode, unsigned long event)
  127 +{
  128 + struct dnotify_struct * dn;
  129 + struct dnotify_struct **prev;
  130 + struct fown_struct * fown;
  131 + int changed = 0;
  132 +
  133 + spin_lock(&inode->i_lock);
  134 + prev = &inode->i_dnotify;
  135 + while ((dn = *prev) != NULL) {
  136 + if ((dn->dn_mask & event) == 0) {
  137 + prev = &dn->dn_next;
  138 + continue;
  139 + }
  140 + fown = &dn->dn_filp->f_owner;
  141 + send_sigio(fown, dn->dn_fd, POLL_MSG);
  142 + if (dn->dn_mask & DN_MULTISHOT)
  143 + prev = &dn->dn_next;
  144 + else {
  145 + *prev = dn->dn_next;
  146 + changed = 1;
  147 + kmem_cache_free(dn_cache, dn);
  148 + }
  149 + }
  150 + if (changed)
  151 + redo_inode_mask(inode);
  152 + spin_unlock(&inode->i_lock);
  153 +}
  154 +
  155 +EXPORT_SYMBOL(__inode_dir_notify);
  156 +
  157 +/*
  158 + * This is hopelessly wrong, but unfixable without API changes. At
  159 + * least it doesn't oops the kernel...
  160 + *
  161 + * To safely access ->d_parent we need to keep d_move away from it. Use the
  162 + * dentry's d_lock for this.
  163 + */
  164 +void dnotify_parent(struct dentry *dentry, unsigned long event)
  165 +{
  166 + struct dentry *parent;
  167 +
  168 + if (!dir_notify_enable)
  169 + return;
  170 +
  171 + spin_lock(&dentry->d_lock);
  172 + parent = dentry->d_parent;
  173 + if (parent->d_inode->i_dnotify_mask & event) {
  174 + dget(parent);
  175 + spin_unlock(&dentry->d_lock);
  176 + __inode_dir_notify(parent->d_inode, event);
  177 + dput(parent);
  178 + } else {
  179 + spin_unlock(&dentry->d_lock);
  180 + }
  181 +}
  182 +EXPORT_SYMBOL_GPL(dnotify_parent);
  183 +
  184 +static int __init dnotify_init(void)
  185 +{
  186 + dn_cache = kmem_cache_create("dnotify_cache",
  187 + sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
  188 + return 0;
  189 +}
  190 +
  191 +module_init(dnotify_init)
fs/notify/inotify/Kconfig
  1 +config INOTIFY
  2 + bool "Inotify file change notification support"
  3 + default y
  4 + ---help---
  5 + Say Y here to enable inotify support. Inotify is a file change
  6 + notification system and a replacement for dnotify. Inotify fixes
  7 + numerous shortcomings in dnotify and introduces several new features
  8 + including multiple file events, one-shot support, and unmount
  9 + notification.
  10 +
  11 + For more information, see <file:Documentation/filesystems/inotify.txt>
  12 +
  13 + If unsure, say Y.
  14 +
  15 +config INOTIFY_USER
  16 + bool "Inotify support for userspace"
  17 + depends on INOTIFY
  18 + default y
  19 + ---help---
  20 + Say Y here to enable inotify support for userspace, including the
  21 + associated system calls. Inotify allows monitoring of both files and
  22 + directories via a single open fd. Events are read from the file
  23 + descriptor, which is also select()- and poll()-able.
  24 +
  25 + For more information, see <file:Documentation/filesystems/inotify.txt>
  26 +
  27 + If unsure, say Y.
fs/notify/inotify/Makefile
  1 +obj-$(CONFIG_INOTIFY) += inotify.o
  2 +obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
fs/notify/inotify/inotify.c
  1 +/*
  2 + * fs/inotify.c - inode-based file event notifications
  3 + *
  4 + * Authors:
  5 + * John McCutchan <ttb@tentacle.dhs.org>
  6 + * Robert Love <rml@novell.com>
  7 + *
  8 + * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
  9 + *
  10 + * Copyright (C) 2005 John McCutchan
  11 + * Copyright 2006 Hewlett-Packard Development Company, L.P.
  12 + *
  13 + * This program is free software; you can redistribute it and/or modify it
  14 + * under the terms of the GNU General Public License as published by the
  15 + * Free Software Foundation; either version 2, or (at your option) any
  16 + * later version.
  17 + *
  18 + * This program is distributed in the hope that it will be useful, but
  19 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  20 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  21 + * General Public License for more details.
  22 + */
  23 +
  24 +#include <linux/module.h>
  25 +#include <linux/kernel.h>
  26 +#include <linux/spinlock.h>
  27 +#include <linux/idr.h>
  28 +#include <linux/slab.h>
  29 +#include <linux/fs.h>
  30 +#include <linux/sched.h>
  31 +#include <linux/init.h>
  32 +#include <linux/list.h>
  33 +#include <linux/writeback.h>
  34 +#include <linux/inotify.h>
  35 +
  36 +static atomic_t inotify_cookie;
  37 +
  38 +/*
  39 + * Lock ordering:
  40 + *
  41 + * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
  42 + * iprune_mutex (synchronize shrink_icache_memory())
  43 + * inode_lock (protects the super_block->s_inodes list)
  44 + * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
  45 + * inotify_handle->mutex (protects inotify_handle and watches->h_list)
  46 + *
  47 + * The inode->inotify_mutex and inotify_handle->mutex and held during execution
  48 + * of a caller's event handler. Thus, the caller must not hold any locks
  49 + * taken in their event handler while calling any of the published inotify
  50 + * interfaces.
  51 + */
  52 +
  53 +/*
  54 + * Lifetimes of the three main data structures--inotify_handle, inode, and
  55 + * inotify_watch--are managed by reference count.
  56 + *
  57 + * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
  58 + * Additional references can bump the count via get_inotify_handle() and drop
  59 + * the count via put_inotify_handle().
  60 + *
  61 + * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
  62 + * to remove_watch_no_event(). Additional references can bump the count via
  63 + * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
  64 + * is reponsible for the final put after receiving IN_IGNORED, or when using
  65 + * IN_ONESHOT after receiving the first event. Inotify does the final put if
  66 + * inotify_destroy() is called.
  67 + *
  68 + * inode: Pinned so long as the inode is associated with a watch, from
  69 + * inotify_add_watch() to the final put_inotify_watch().
  70 + */
  71 +
  72 +/*
  73 + * struct inotify_handle - represents an inotify instance
  74 + *
  75 + * This structure is protected by the mutex 'mutex'.
  76 + */
  77 +struct inotify_handle {
  78 + struct idr idr; /* idr mapping wd -> watch */
  79 + struct mutex mutex; /* protects this bad boy */
  80 + struct list_head watches; /* list of watches */
  81 + atomic_t count; /* reference count */
  82 + u32 last_wd; /* the last wd allocated */
  83 + const struct inotify_operations *in_ops; /* inotify caller operations */
  84 +};
  85 +
  86 +static inline void get_inotify_handle(struct inotify_handle *ih)
  87 +{
  88 + atomic_inc(&ih->count);
  89 +}
  90 +
  91 +static inline void put_inotify_handle(struct inotify_handle *ih)
  92 +{
  93 + if (atomic_dec_and_test(&ih->count)) {
  94 + idr_destroy(&ih->idr);
  95 + kfree(ih);
  96 + }
  97 +}
  98 +
  99 +/**
  100 + * get_inotify_watch - grab a reference to an inotify_watch
  101 + * @watch: watch to grab
  102 + */
  103 +void get_inotify_watch(struct inotify_watch *watch)
  104 +{
  105 + atomic_inc(&watch->count);
  106 +}
  107 +EXPORT_SYMBOL_GPL(get_inotify_watch);
  108 +
  109 +int pin_inotify_watch(struct inotify_watch *watch)
  110 +{
  111 + struct super_block *sb = watch->inode->i_sb;
  112 + spin_lock(&sb_lock);
  113 + if (sb->s_count >= S_BIAS) {
  114 + atomic_inc(&sb->s_active);
  115 + spin_unlock(&sb_lock);
  116 + atomic_inc(&watch->count);
  117 + return 1;
  118 + }
  119 + spin_unlock(&sb_lock);
  120 + return 0;
  121 +}
  122 +
  123 +/**
  124 + * put_inotify_watch - decrements the ref count on a given watch. cleans up
  125 + * watch references if the count reaches zero. inotify_watch is freed by
  126 + * inotify callers via the destroy_watch() op.
  127 + * @watch: watch to release
  128 + */
  129 +void put_inotify_watch(struct inotify_watch *watch)
  130 +{
  131 + if (atomic_dec_and_test(&watch->count)) {
  132 + struct inotify_handle *ih = watch->ih;
  133 +
  134 + iput(watch->inode);
  135 + ih->in_ops->destroy_watch(watch);
  136 + put_inotify_handle(ih);
  137 + }
  138 +}
  139 +EXPORT_SYMBOL_GPL(put_inotify_watch);
  140 +
  141 +void unpin_inotify_watch(struct inotify_watch *watch)
  142 +{
  143 + struct super_block *sb = watch->inode->i_sb;
  144 + put_inotify_watch(watch);
  145 + deactivate_super(sb);
  146 +}
  147 +
  148 +/*
  149 + * inotify_handle_get_wd - returns the next WD for use by the given handle
  150 + *
  151 + * Callers must hold ih->mutex. This function can sleep.
  152 + */
  153 +static int inotify_handle_get_wd(struct inotify_handle *ih,
  154 + struct inotify_watch *watch)
  155 +{
  156 + int ret;
  157 +
  158 + do {
  159 + if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
  160 + return -ENOSPC;
  161 + ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
  162 + } while (ret == -EAGAIN);
  163 +
  164 + if (likely(!ret))
  165 + ih->last_wd = watch->wd;
  166 +
  167 + return ret;
  168 +}
  169 +
  170 +/*
  171 + * inotify_inode_watched - returns nonzero if there are watches on this inode
  172 + * and zero otherwise. We call this lockless, we do not care if we race.
  173 + */
  174 +static inline int inotify_inode_watched(struct inode *inode)
  175 +{
  176 + return !list_empty(&inode->inotify_watches);
  177 +}
  178 +
  179 +/*
  180 + * Get child dentry flag into synch with parent inode.
  181 + * Flag should always be clear for negative dentrys.
  182 + */
  183 +static void set_dentry_child_flags(struct inode *inode, int watched)
  184 +{
  185 + struct dentry *alias;
  186 +
  187 + spin_lock(&dcache_lock);
  188 + list_for_each_entry(alias, &inode->i_dentry, d_alias) {
  189 + struct dentry *child;
  190 +
  191 + list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
  192 + if (!child->d_inode)
  193 + continue;
  194 +
  195 + spin_lock(&child->d_lock);
  196 + if (watched)
  197 + child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
  198 + else
  199 + child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
  200 + spin_unlock(&child->d_lock);
  201 + }
  202 + }
  203 + spin_unlock(&dcache_lock);
  204 +}
  205 +
  206 +/*
  207 + * inotify_find_handle - find the watch associated with the given inode and
  208 + * handle
  209 + *
  210 + * Callers must hold inode->inotify_mutex.
  211 + */
  212 +static struct inotify_watch *inode_find_handle(struct inode *inode,
  213 + struct inotify_handle *ih)
  214 +{
  215 + struct inotify_watch *watch;
  216 +
  217 + list_for_each_entry(watch, &inode->inotify_watches, i_list) {
  218 + if (watch->ih == ih)
  219 + return watch;
  220 + }
  221 +
  222 + return NULL;
  223 +}
  224 +
  225 +/*
  226 + * remove_watch_no_event - remove watch without the IN_IGNORED event.
  227 + *
  228 + * Callers must hold both inode->inotify_mutex and ih->mutex.
  229 + */
  230 +static void remove_watch_no_event(struct inotify_watch *watch,
  231 + struct inotify_handle *ih)
  232 +{
  233 + list_del(&watch->i_list);
  234 + list_del(&watch->h_list);
  235 +
  236 + if (!inotify_inode_watched(watch->inode))
  237 + set_dentry_child_flags(watch->inode, 0);
  238 +
  239 + idr_remove(&ih->idr, watch->wd);
  240 +}
  241 +
  242 +/**
  243 + * inotify_remove_watch_locked - Remove a watch from both the handle and the
  244 + * inode. Sends the IN_IGNORED event signifying that the inode is no longer
  245 + * watched. May be invoked from a caller's event handler.
  246 + * @ih: inotify handle associated with watch
  247 + * @watch: watch to remove
  248 + *
  249 + * Callers must hold both inode->inotify_mutex and ih->mutex.
  250 + */
  251 +void inotify_remove_watch_locked(struct inotify_handle *ih,
  252 + struct inotify_watch *watch)
  253 +{
  254 + remove_watch_no_event(watch, ih);
  255 + ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
  256 +}
  257 +EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
  258 +
  259 +/* Kernel API for producing events */
  260 +
  261 +/*
  262 + * inotify_d_instantiate - instantiate dcache entry for inode
  263 + */
  264 +void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
  265 +{
  266 + struct dentry *parent;
  267 +
  268 + if (!inode)
  269 + return;
  270 +
  271 + spin_lock(&entry->d_lock);
  272 + parent = entry->d_parent;
  273 + if (parent->d_inode && inotify_inode_watched(parent->d_inode))
  274 + entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
  275 + spin_unlock(&entry->d_lock);
  276 +}
  277 +
  278 +/*
  279 + * inotify_d_move - dcache entry has been moved
  280 + */
  281 +void inotify_d_move(struct dentry *entry)
  282 +{
  283 + struct dentry *parent;
  284 +
  285 + parent = entry->d_parent;
  286 + if (inotify_inode_watched(parent->d_inode))
  287 + entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
  288 + else
  289 + entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
  290 +}
  291 +
  292 +/**
  293 + * inotify_inode_queue_event - queue an event to all watches on this inode
  294 + * @inode: inode event is originating from
  295 + * @mask: event mask describing this event
  296 + * @cookie: cookie for synchronization, or zero
  297 + * @name: filename, if any
  298 + * @n_inode: inode associated with name
  299 + */
  300 +void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
  301 + const char *name, struct inode *n_inode)
  302 +{
  303 + struct inotify_watch *watch, *next;
  304 +
  305 + if (!inotify_inode_watched(inode))
  306 + return;
  307 +
  308 + mutex_lock(&inode->inotify_mutex);
  309 + list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
  310 + u32 watch_mask = watch->mask;
  311 + if (watch_mask & mask) {
  312 + struct inotify_handle *ih= watch->ih;
  313 + mutex_lock(&ih->mutex);
  314 + if (watch_mask & IN_ONESHOT)
  315 + remove_watch_no_event(watch, ih);
  316 + ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
  317 + name, n_inode);
  318 + mutex_unlock(&ih->mutex);
  319 + }
  320 + }
  321 + mutex_unlock(&inode->inotify_mutex);
  322 +}
  323 +EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
  324 +
  325 +/**
  326 + * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
  327 + * @dentry: the dentry in question, we queue against this dentry's parent
  328 + * @mask: event mask describing this event
  329 + * @cookie: cookie for synchronization, or zero
  330 + * @name: filename, if any
  331 + */
  332 +void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
  333 + u32 cookie, const char *name)
  334 +{
  335 + struct dentry *parent;
  336 + struct inode *inode;
  337 +
  338 + if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
  339 + return;
  340 +
  341 + spin_lock(&dentry->d_lock);
  342 + parent = dentry->d_parent;
  343 + inode = parent->d_inode;
  344 +
  345 + if (inotify_inode_watched(inode)) {
  346 + dget(parent);
  347 + spin_unlock(&dentry->d_lock);
  348 + inotify_inode_queue_event(inode, mask, cookie, name,
  349 + dentry->d_inode);
  350 + dput(parent);
  351 + } else
  352 + spin_unlock(&dentry->d_lock);
  353 +}
  354 +EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
  355 +
  356 +/**
  357 + * inotify_get_cookie - return a unique cookie for use in synchronizing events.
  358 + */
  359 +u32 inotify_get_cookie(void)
  360 +{
  361 + return atomic_inc_return(&inotify_cookie);
  362 +}
  363 +EXPORT_SYMBOL_GPL(inotify_get_cookie);
  364 +
  365 +/**
  366 + * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
  367 + * @list: list of inodes being unmounted (sb->s_inodes)
  368 + *
  369 + * Called with inode_lock held, protecting the unmounting super block's list
  370 + * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
  371 + * We temporarily drop inode_lock, however, and CAN block.
  372 + */
  373 +void inotify_unmount_inodes(struct list_head *list)
  374 +{
  375 + struct inode *inode, *next_i, *need_iput = NULL;
  376 +
  377 + list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
  378 + struct inotify_watch *watch, *next_w;
  379 + struct inode *need_iput_tmp;
  380 + struct list_head *watches;
  381 +
  382 + /*
  383 + * If i_count is zero, the inode cannot have any watches and
  384 + * doing an __iget/iput with MS_ACTIVE clear would actually
  385 + * evict all inodes with zero i_count from icache which is
  386 + * unnecessarily violent and may in fact be illegal to do.
  387 + */
  388 + if (!atomic_read(&inode->i_count))
  389 + continue;
  390 +
  391 + /*
  392 + * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
  393 + * I_WILL_FREE which is fine because by that point the inode
  394 + * cannot have any associated watches.
  395 + */
  396 + if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
  397 + continue;
  398 +
  399 + need_iput_tmp = need_iput;
  400 + need_iput = NULL;
  401 + /* In case inotify_remove_watch_locked() drops a reference. */
  402 + if (inode != need_iput_tmp)
  403 + __iget(inode);
  404 + else
  405 + need_iput_tmp = NULL;
  406 + /* In case the dropping of a reference would nuke next_i. */
  407 + if ((&next_i->i_sb_list != list) &&
  408 + atomic_read(&next_i->i_count) &&
  409 + !(next_i->i_state & (I_CLEAR | I_FREEING |
  410 + I_WILL_FREE))) {
  411 + __iget(next_i);
  412 + need_iput = next_i;
  413 + }
  414 +
  415 + /*
  416 + * We can safely drop inode_lock here because we hold
  417 + * references on both inode and next_i. Also no new inodes
  418 + * will be added since the umount has begun. Finally,
  419 + * iprune_mutex keeps shrink_icache_memory() away.
  420 + */
  421 + spin_unlock(&inode_lock);
  422 +
  423 + if (need_iput_tmp)
  424 + iput(need_iput_tmp);
  425 +
  426 + /* for each watch, send IN_UNMOUNT and then remove it */
  427 + mutex_lock(&inode->inotify_mutex);
  428 + watches = &inode->inotify_watches;
  429 + list_for_each_entry_safe(watch, next_w, watches, i_list) {
  430 + struct inotify_handle *ih= watch->ih;
  431 + get_inotify_watch(watch);
  432 + mutex_lock(&ih->mutex);
  433 + ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
  434 + NULL, NULL);
  435 + inotify_remove_watch_locked(ih, watch);
  436 + mutex_unlock(&ih->mutex);
  437 + put_inotify_watch(watch);
  438 + }
  439 + mutex_unlock(&inode->inotify_mutex);
  440 + iput(inode);
  441 +
  442 + spin_lock(&inode_lock);
  443 + }
  444 +}
  445 +EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
  446 +
  447 +/**
  448 + * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
  449 + * @inode: inode that is about to be removed
  450 + */
  451 +void inotify_inode_is_dead(struct inode *inode)
  452 +{
  453 + struct inotify_watch *watch, *next;
  454 +
  455 + mutex_lock(&inode->inotify_mutex);
  456 + list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
  457 + struct inotify_handle *ih = watch->ih;
  458 + mutex_lock(&ih->mutex);
  459 + inotify_remove_watch_locked(ih, watch);
  460 + mutex_unlock(&ih->mutex);
  461 + }
  462 + mutex_unlock(&inode->inotify_mutex);
  463 +}
  464 +EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
  465 +
  466 +/* Kernel Consumer API */
  467 +
  468 +/**
  469 + * inotify_init - allocate and initialize an inotify instance
  470 + * @ops: caller's inotify operations
  471 + */
  472 +struct inotify_handle *inotify_init(const struct inotify_operations *ops)
  473 +{
  474 + struct inotify_handle *ih;
  475 +
  476 + ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
  477 + if (unlikely(!ih))
  478 + return ERR_PTR(-ENOMEM);
  479 +
  480 + idr_init(&ih->idr);
  481 + INIT_LIST_HEAD(&ih->watches);
  482 + mutex_init(&ih->mutex);
  483 + ih->last_wd = 0;
  484 + ih->in_ops = ops;
  485 + atomic_set(&ih->count, 0);
  486 + get_inotify_handle(ih);
  487 +
  488 + return ih;
  489 +}
  490 +EXPORT_SYMBOL_GPL(inotify_init);
  491 +
  492 +/**
  493 + * inotify_init_watch - initialize an inotify watch
  494 + * @watch: watch to initialize
  495 + */
  496 +void inotify_init_watch(struct inotify_watch *watch)
  497 +{
  498 + INIT_LIST_HEAD(&watch->h_list);
  499 + INIT_LIST_HEAD(&watch->i_list);
  500 + atomic_set(&watch->count, 0);
  501 + get_inotify_watch(watch); /* initial get */
  502 +}
  503 +EXPORT_SYMBOL_GPL(inotify_init_watch);
  504 +
  505 +/*
  506 + * Watch removals suck violently. To kick the watch out we need (in this
  507 + * order) inode->inotify_mutex and ih->mutex. That's fine if we have
  508 + * a hold on inode; however, for all other cases we need to make damn sure
  509 + * we don't race with umount. We can *NOT* just grab a reference to a
  510 + * watch - inotify_unmount_inodes() will happily sail past it and we'll end
  511 + * with reference to inode potentially outliving its superblock. Ideally
  512 + * we just want to grab an active reference to superblock if we can; that
  513 + * will make sure we won't go into inotify_umount_inodes() until we are
  514 + * done. Cleanup is just deactivate_super(). However, that leaves a messy
  515 + * case - what if we *are* racing with umount() and active references to
  516 + * superblock can't be acquired anymore? We can bump ->s_count, grab
  517 + * ->s_umount, which will almost certainly wait until the superblock is shut
  518 + * down and the watch in question is pining for fjords. That's fine, but
  519 + * there is a problem - we might have hit the window between ->s_active
  520 + * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
  521 + * is past the point of no return and is heading for shutdown) and the
  522 + * moment when deactivate_super() acquires ->s_umount. We could just do
  523 + * drop_super() yield() and retry, but that's rather antisocial and this
  524 + * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having
  525 + * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
  526 + * that we won't race with inotify_umount_inodes(). So we could grab a
  527 + * reference to watch and do the rest as above, just with drop_super() instead
  528 + * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we
  529 + * could grab ->s_umount. So the watch could've been gone already.
  530 + *
  531 + * That still can be dealt with - we need to save watch->wd, do idr_find()
  532 + * and compare its result with our pointer. If they match, we either have
  533 + * the damn thing still alive or we'd lost not one but two races at once,
  534 + * the watch had been killed and a new one got created with the same ->wd
  535 + * at the same address. That couldn't have happened in inotify_destroy(),
  536 + * but inotify_rm_wd() could run into that. Still, "new one got created"
  537 + * is not a problem - we have every right to kill it or leave it alone,
  538 + * whatever's more convenient.
  539 + *
  540 + * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
  541 + * "grab it and kill it" check. If it's been our original watch, we are
  542 + * fine, if it's a newcomer - nevermind, just pretend that we'd won the
  543 + * race and kill the fscker anyway; we are safe since we know that its
  544 + * superblock won't be going away.
  545 + *
  546 + * And yes, this is far beyond mere "not very pretty"; so's the entire
  547 + * concept of inotify to start with.
  548 + */
  549 +
  550 +/**
  551 + * pin_to_kill - pin the watch down for removal
  552 + * @ih: inotify handle
  553 + * @watch: watch to kill
  554 + *
  555 + * Called with ih->mutex held, drops it. Possible return values:
  556 + * 0 - nothing to do, it has died
  557 + * 1 - remove it, drop the reference and deactivate_super()
  558 + * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
  559 + * that variant, since it involved a lot of PITA, but that's the best that
  560 + * could've been done.
  561 + */
  562 +static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
  563 +{
  564 + struct super_block *sb = watch->inode->i_sb;
  565 + s32 wd = watch->wd;
  566 +
  567 + spin_lock(&sb_lock);
  568 + if (sb->s_count >= S_BIAS) {
  569 + atomic_inc(&sb->s_active);
  570 + spin_unlock(&sb_lock);
  571 + get_inotify_watch(watch);
  572 + mutex_unlock(&ih->mutex);
  573 + return 1; /* the best outcome */
  574 + }
  575 + sb->s_count++;
  576 + spin_unlock(&sb_lock);
  577 + mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
  578 + down_read(&sb->s_umount);
  579 + if (likely(!sb->s_root)) {
  580 + /* fs is already shut down; the watch is dead */
  581 + drop_super(sb);
  582 + return 0;
  583 + }
  584 + /* raced with the final deactivate_super() */
  585 + mutex_lock(&ih->mutex);
  586 + if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
  587 + /* the watch is dead */
  588 + mutex_unlock(&ih->mutex);
  589 + drop_super(sb);
  590 + return 0;
  591 + }
  592 + /* still alive or freed and reused with the same sb and wd; kill */
  593 + get_inotify_watch(watch);
  594 + mutex_unlock(&ih->mutex);
  595 + return 2;
  596 +}
  597 +
  598 +static void unpin_and_kill(struct inotify_watch *watch, int how)
  599 +{
  600 + struct super_block *sb = watch->inode->i_sb;
  601 + put_inotify_watch(watch);
  602 + switch (how) {
  603 + case 1:
  604 + deactivate_super(sb);
  605 + break;
  606 + case 2:
  607 + drop_super(sb);
  608 + }
  609 +}
  610 +
  611 +/**
  612 + * inotify_destroy - clean up and destroy an inotify instance
  613 + * @ih: inotify handle
  614 + */
  615 +void inotify_destroy(struct inotify_handle *ih)
  616 +{
  617 + /*
  618 + * Destroy all of the watches for this handle. Unfortunately, not very
  619 + * pretty. We cannot do a simple iteration over the list, because we
  620 + * do not know the inode until we iterate to the watch. But we need to
  621 + * hold inode->inotify_mutex before ih->mutex. The following works.
  622 + *
  623 + * AV: it had to become even uglier to start working ;-/
  624 + */
  625 + while (1) {
  626 + struct inotify_watch *watch;
  627 + struct list_head *watches;
  628 + struct super_block *sb;
  629 + struct inode *inode;
  630 + int how;
  631 +
  632 + mutex_lock(&ih->mutex);
  633 + watches = &ih->watches;
  634 + if (list_empty(watches)) {
  635 + mutex_unlock(&ih->mutex);
  636 + break;
  637 + }
  638 + watch = list_first_entry(watches, struct inotify_watch, h_list);
  639 + sb = watch->inode->i_sb;
  640 + how = pin_to_kill(ih, watch);
  641 + if (!how)
  642 + continue;
  643 +
  644 + inode = watch->inode;
  645 + mutex_lock(&inode->inotify_mutex);
  646 + mutex_lock(&ih->mutex);
  647 +
  648 + /* make sure we didn't race with another list removal */
  649 + if (likely(idr_find(&ih->idr, watch->wd))) {
  650 + remove_watch_no_event(watch, ih);
  651 + put_inotify_watch(watch);
  652 + }
  653 +
  654 + mutex_unlock(&ih->mutex);
  655 + mutex_unlock(&inode->inotify_mutex);
  656 + unpin_and_kill(watch, how);
  657 + }
  658 +
  659 + /* free this handle: the put matching the get in inotify_init() */
  660 + put_inotify_handle(ih);
  661 +}
  662 +EXPORT_SYMBOL_GPL(inotify_destroy);
  663 +
  664 +/**
  665 + * inotify_find_watch - find an existing watch for an (ih,inode) pair
  666 + * @ih: inotify handle
  667 + * @inode: inode to watch
  668 + * @watchp: pointer to existing inotify_watch
  669 + *
  670 + * Caller must pin given inode (via nameidata).
  671 + */
  672 +s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
  673 + struct inotify_watch **watchp)
  674 +{
  675 + struct inotify_watch *old;
  676 + int ret = -ENOENT;
  677 +
  678 + mutex_lock(&inode->inotify_mutex);
  679 + mutex_lock(&ih->mutex);
  680 +
  681 + old = inode_find_handle(inode, ih);
  682 + if (unlikely(old)) {
  683 + get_inotify_watch(old); /* caller must put watch */
  684 + *watchp = old;
  685 + ret = old->wd;
  686 + }
  687 +
  688 + mutex_unlock(&ih->mutex);
  689 + mutex_unlock(&inode->inotify_mutex);
  690 +
  691 + return ret;
  692 +}
  693 +EXPORT_SYMBOL_GPL(inotify_find_watch);
  694 +
  695 +/**
  696 + * inotify_find_update_watch - find and update the mask of an existing watch
  697 + * @ih: inotify handle
  698 + * @inode: inode's watch to update
  699 + * @mask: mask of events to watch
  700 + *
  701 + * Caller must pin given inode (via nameidata).
  702 + */
  703 +s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
  704 + u32 mask)
  705 +{
  706 + struct inotify_watch *old;
  707 + int mask_add = 0;
  708 + int ret;
  709 +
  710 + if (mask & IN_MASK_ADD)
  711 + mask_add = 1;
  712 +
  713 + /* don't allow invalid bits: we don't want flags set */
  714 + mask &= IN_ALL_EVENTS | IN_ONESHOT;
  715 + if (unlikely(!mask))
  716 + return -EINVAL;
  717 +
  718 + mutex_lock(&inode->inotify_mutex);
  719 + mutex_lock(&ih->mutex);
  720 +
  721 + /*
  722 + * Handle the case of re-adding a watch on an (inode,ih) pair that we
  723 + * are already watching. We just update the mask and return its wd.
  724 + */
  725 + old = inode_find_handle(inode, ih);
  726 + if (unlikely(!old)) {
  727 + ret = -ENOENT;
  728 + goto out;
  729 + }
  730 +
  731 + if (mask_add)
  732 + old->mask |= mask;
  733 + else
  734 + old->mask = mask;
  735 + ret = old->wd;
  736 +out:
  737 + mutex_unlock(&ih->mutex);
  738 + mutex_unlock(&inode->inotify_mutex);
  739 + return ret;
  740 +}
  741 +EXPORT_SYMBOL_GPL(inotify_find_update_watch);
  742 +
  743 +/**
  744 + * inotify_add_watch - add a watch to an inotify instance
  745 + * @ih: inotify handle
  746 + * @watch: caller allocated watch structure
  747 + * @inode: inode to watch
  748 + * @mask: mask of events to watch
  749 + *
  750 + * Caller must pin given inode (via nameidata).
  751 + * Caller must ensure it only calls inotify_add_watch() once per watch.
  752 + * Calls inotify_handle_get_wd() so may sleep.
  753 + */
  754 +s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
  755 + struct inode *inode, u32 mask)
  756 +{
  757 + int ret = 0;
  758 + int newly_watched;
  759 +
  760 + /* don't allow invalid bits: we don't want flags set */
  761 + mask &= IN_ALL_EVENTS | IN_ONESHOT;
  762 + if (unlikely(!mask))
  763 + return -EINVAL;
  764 + watch->mask = mask;
  765 +
  766 + mutex_lock(&inode->inotify_mutex);
  767 + mutex_lock(&ih->mutex);
  768 +
  769 + /* Initialize a new watch */
  770 + ret = inotify_handle_get_wd(ih, watch);
  771 + if (unlikely(ret))
  772 + goto out;
  773 + ret = watch->wd;
  774 +
  775 + /* save a reference to handle and bump the count to make it official */
  776 + get_inotify_handle(ih);
  777 + watch->ih = ih;
  778 +
  779 + /*
  780 + * Save a reference to the inode and bump the ref count to make it
  781 + * official. We hold a reference to nameidata, which makes this safe.
  782 + */
  783 + watch->inode = igrab(inode);
  784 +
  785 + /* Add the watch to the handle's and the inode's list */
  786 + newly_watched = !inotify_inode_watched(inode);
  787 + list_add(&watch->h_list, &ih->watches);
  788 + list_add(&watch->i_list, &inode->inotify_watches);
  789 + /*
  790 + * Set child flags _after_ adding the watch, so there is no race
  791 + * windows where newly instantiated children could miss their parent's
  792 + * watched flag.
  793 + */
  794 + if (newly_watched)
  795 + set_dentry_child_flags(inode, 1);
  796 +
  797 +out:
  798 + mutex_unlock(&ih->mutex);
  799 + mutex_unlock(&inode->inotify_mutex);
  800 + return ret;
  801 +}
  802 +EXPORT_SYMBOL_GPL(inotify_add_watch);
  803 +
  804 +/**
  805 + * inotify_clone_watch - put the watch next to existing one
  806 + * @old: already installed watch
  807 + * @new: new watch
  808 + *
  809 + * Caller must hold the inotify_mutex of inode we are dealing with;
  810 + * it is expected to remove the old watch before unlocking the inode.
  811 + */
  812 +s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
  813 +{
  814 + struct inotify_handle *ih = old->ih;
  815 + int ret = 0;
  816 +
  817 + new->mask = old->mask;
  818 + new->ih = ih;
  819 +
  820 + mutex_lock(&ih->mutex);
  821 +
  822 + /* Initialize a new watch */
  823 + ret = inotify_handle_get_wd(ih, new);
  824 + if (unlikely(ret))
  825 + goto out;
  826 + ret = new->wd;
  827 +
  828 + get_inotify_handle(ih);
  829 +
  830 + new->inode = igrab(old->inode);
  831 +
  832 + list_add(&new->h_list, &ih->watches);
  833 + list_add(&new->i_list, &old->inode->inotify_watches);
  834 +out:
  835 + mutex_unlock(&ih->mutex);
  836 + return ret;
  837 +}
  838 +
  839 +void inotify_evict_watch(struct inotify_watch *watch)
  840 +{
  841 + get_inotify_watch(watch);
  842 + mutex_lock(&watch->ih->mutex);
  843 + inotify_remove_watch_locked(watch->ih, watch);
  844 + mutex_unlock(&watch->ih->mutex);
  845 +}
  846 +
  847 +/**
  848 + * inotify_rm_wd - remove a watch from an inotify instance
  849 + * @ih: inotify handle
  850 + * @wd: watch descriptor to remove
  851 + *
  852 + * Can sleep.
  853 + */
  854 +int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
  855 +{
  856 + struct inotify_watch *watch;
  857 + struct super_block *sb;
  858 + struct inode *inode;
  859 + int how;
  860 +
  861 + mutex_lock(&ih->mutex);
  862 + watch = idr_find(&ih->idr, wd);
  863 + if (unlikely(!watch)) {
  864 + mutex_unlock(&ih->mutex);
  865 + return -EINVAL;
  866 + }
  867 + sb = watch->inode->i_sb;
  868 + how = pin_to_kill(ih, watch);
  869 + if (!how)
  870 + return 0;
  871 +
  872 + inode = watch->inode;
  873 +
  874 + mutex_lock(&inode->inotify_mutex);
  875 + mutex_lock(&ih->mutex);
  876 +
  877 + /* make sure that we did not race */
  878 + if (likely(idr_find(&ih->idr, wd) == watch))
  879 + inotify_remove_watch_locked(ih, watch);
  880 +
  881 + mutex_unlock(&ih->mutex);
  882 + mutex_unlock(&inode->inotify_mutex);
  883 + unpin_and_kill(watch, how);
  884 +
  885 + return 0;
  886 +}
  887 +EXPORT_SYMBOL_GPL(inotify_rm_wd);
  888 +
  889 +/**
  890 + * inotify_rm_watch - remove a watch from an inotify instance
  891 + * @ih: inotify handle
  892 + * @watch: watch to remove
  893 + *
  894 + * Can sleep.
  895 + */
  896 +int inotify_rm_watch(struct inotify_handle *ih,
  897 + struct inotify_watch *watch)
  898 +{
  899 + return inotify_rm_wd(ih, watch->wd);
  900 +}
  901 +EXPORT_SYMBOL_GPL(inotify_rm_watch);
  902 +
  903 +/*
  904 + * inotify_setup - core initialization function
  905 + */
  906 +static int __init inotify_setup(void)
  907 +{
  908 + atomic_set(&inotify_cookie, 0);
  909 +
  910 + return 0;
  911 +}
  912 +
  913 +module_init(inotify_setup);
fs/notify/inotify/inotify_user.c
  1 +/*
  2 + * fs/inotify_user.c - inotify support for userspace
  3 + *
  4 + * Authors:
  5 + * John McCutchan <ttb@tentacle.dhs.org>
  6 + * Robert Love <rml@novell.com>
  7 + *
  8 + * Copyright (C) 2005 John McCutchan
  9 + * Copyright 2006 Hewlett-Packard Development Company, L.P.
  10 + *
  11 + * This program is free software; you can redistribute it and/or modify it
  12 + * under the terms of the GNU General Public License as published by the
  13 + * Free Software Foundation; either version 2, or (at your option) any
  14 + * later version.
  15 + *
  16 + * This program is distributed in the hope that it will be useful, but
  17 + * WITHOUT ANY WARRANTY; without even the implied warranty of
  18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19 + * General Public License for more details.
  20 + */
  21 +
  22 +#include <linux/kernel.h>
  23 +#include <linux/sched.h>
  24 +#include <linux/slab.h>
  25 +#include <linux/fs.h>
  26 +#include <linux/file.h>
  27 +#include <linux/mount.h>
  28 +#include <linux/namei.h>
  29 +#include <linux/poll.h>
  30 +#include <linux/init.h>
  31 +#include <linux/list.h>
  32 +#include <linux/inotify.h>
  33 +#include <linux/syscalls.h>
  34 +#include <linux/magic.h>
  35 +
  36 +#include <asm/ioctls.h>
  37 +
  38 +static struct kmem_cache *watch_cachep __read_mostly;
  39 +static struct kmem_cache *event_cachep __read_mostly;
  40 +
  41 +static struct vfsmount *inotify_mnt __read_mostly;
  42 +
  43 +/* these are configurable via /proc/sys/fs/inotify/ */
  44 +static int inotify_max_user_instances __read_mostly;
  45 +static int inotify_max_user_watches __read_mostly;
  46 +static int inotify_max_queued_events __read_mostly;
  47 +
  48 +/*
  49 + * Lock ordering:
  50 + *
  51 + * inotify_dev->up_mutex (ensures we don't re-add the same watch)
  52 + * inode->inotify_mutex (protects inode's watch list)
  53 + * inotify_handle->mutex (protects inotify_handle's watch list)
  54 + * inotify_dev->ev_mutex (protects device's event queue)
  55 + */
  56 +
  57 +/*
  58 + * Lifetimes of the main data structures:
  59 + *
  60 + * inotify_device: Lifetime is managed by reference count, from
  61 + * sys_inotify_init() until release. Additional references can bump the count
  62 + * via get_inotify_dev() and drop the count via put_inotify_dev().
  63 + *
  64 + * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
  65 + * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
  66 + * first event, or to inotify_destroy().
  67 + */
  68 +
  69 +/*
  70 + * struct inotify_device - represents an inotify instance
  71 + *
  72 + * This structure is protected by the mutex 'mutex'.
  73 + */
  74 +struct inotify_device {
  75 + wait_queue_head_t wq; /* wait queue for i/o */
  76 + struct mutex ev_mutex; /* protects event queue */
  77 + struct mutex up_mutex; /* synchronizes watch updates */
  78 + struct list_head events; /* list of queued events */
  79 + struct user_struct *user; /* user who opened this dev */
  80 + struct inotify_handle *ih; /* inotify handle */
  81 + struct fasync_struct *fa; /* async notification */
  82 + atomic_t count; /* reference count */
  83 + unsigned int queue_size; /* size of the queue (bytes) */
  84 + unsigned int event_count; /* number of pending events */
  85 + unsigned int max_events; /* maximum number of events */
  86 +};
  87 +
  88 +/*
  89 + * struct inotify_kernel_event - An inotify event, originating from a watch and
  90 + * queued for user-space. A list of these is attached to each instance of the
  91 + * device. In read(), this list is walked and all events that can fit in the
  92 + * buffer are returned.
  93 + *
  94 + * Protected by dev->ev_mutex of the device in which we are queued.
  95 + */
  96 +struct inotify_kernel_event {
  97 + struct inotify_event event; /* the user-space event */
  98 + struct list_head list; /* entry in inotify_device's list */
  99 + char *name; /* filename, if any */
  100 +};
  101 +
  102 +/*
  103 + * struct inotify_user_watch - our version of an inotify_watch, we add
  104 + * a reference to the associated inotify_device.
  105 + */
  106 +struct inotify_user_watch {
  107 + struct inotify_device *dev; /* associated device */
  108 + struct inotify_watch wdata; /* inotify watch data */
  109 +};
  110 +
  111 +#ifdef CONFIG_SYSCTL
  112 +
  113 +#include <linux/sysctl.h>
  114 +
  115 +static int zero;
  116 +
  117 +ctl_table inotify_table[] = {
  118 + {
  119 + .ctl_name = INOTIFY_MAX_USER_INSTANCES,
  120 + .procname = "max_user_instances",
  121 + .data = &inotify_max_user_instances,
  122 + .maxlen = sizeof(int),
  123 + .mode = 0644,
  124 + .proc_handler = &proc_dointvec_minmax,
  125 + .strategy = &sysctl_intvec,
  126 + .extra1 = &zero,
  127 + },
  128 + {
  129 + .ctl_name = INOTIFY_MAX_USER_WATCHES,
  130 + .procname = "max_user_watches",
  131 + .data = &inotify_max_user_watches,
  132 + .maxlen = sizeof(int),
  133 + .mode = 0644,
  134 + .proc_handler = &proc_dointvec_minmax,
  135 + .strategy = &sysctl_intvec,
  136 + .extra1 = &zero,
  137 + },
  138 + {
  139 + .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
  140 + .procname = "max_queued_events",
  141 + .data = &inotify_max_queued_events,
  142 + .maxlen = sizeof(int),
  143 + .mode = 0644,
  144 + .proc_handler = &proc_dointvec_minmax,
  145 + .strategy = &sysctl_intvec,
  146 + .extra1 = &zero
  147 + },
  148 + { .ctl_name = 0 }
  149 +};
  150 +#endif /* CONFIG_SYSCTL */
  151 +
  152 +static inline void get_inotify_dev(struct inotify_device *dev)
  153 +{
  154 + atomic_inc(&dev->count);
  155 +}
  156 +
  157 +static inline void put_inotify_dev(struct inotify_device *dev)
  158 +{
  159 + if (atomic_dec_and_test(&dev->count)) {
  160 + atomic_dec(&dev->user->inotify_devs);
  161 + free_uid(dev->user);
  162 + kfree(dev);
  163 + }
  164 +}
  165 +
  166 +/*
  167 + * free_inotify_user_watch - cleans up the watch and its references
  168 + */
  169 +static void free_inotify_user_watch(struct inotify_watch *w)
  170 +{
  171 + struct inotify_user_watch *watch;
  172 + struct inotify_device *dev;
  173 +
  174 + watch = container_of(w, struct inotify_user_watch, wdata);
  175 + dev = watch->dev;
  176 +
  177 + atomic_dec(&dev->user->inotify_watches);
  178 + put_inotify_dev(dev);
  179 + kmem_cache_free(watch_cachep, watch);
  180 +}
  181 +
  182 +/*
  183 + * kernel_event - create a new kernel event with the given parameters
  184 + *
  185 + * This function can sleep.
  186 + */
  187 +static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
  188 + const char *name)
  189 +{
  190 + struct inotify_kernel_event *kevent;
  191 +
  192 + kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
  193 + if (unlikely(!kevent))
  194 + return NULL;
  195 +
  196 + /* we hand this out to user-space, so zero it just in case */
  197 + memset(&kevent->event, 0, sizeof(struct inotify_event));
  198 +
  199 + kevent->event.wd = wd;
  200 + kevent->event.mask = mask;
  201 + kevent->event.cookie = cookie;
  202 +
  203 + INIT_LIST_HEAD(&kevent->list);
  204 +
  205 + if (name) {
  206 + size_t len, rem, event_size = sizeof(struct inotify_event);
  207 +
  208 + /*
  209 + * We need to pad the filename so as to properly align an
  210 + * array of inotify_event structures. Because the structure is
  211 + * small and the common case is a small filename, we just round
  212 + * up to the next multiple of the structure's sizeof. This is
  213 + * simple and safe for all architectures.
  214 + */
  215 + len = strlen(name) + 1;
  216 + rem = event_size - len;
  217 + if (len > event_size) {
  218 + rem = event_size - (len % event_size);
  219 + if (len % event_size == 0)
  220 + rem = 0;
  221 + }
  222 +
  223 + kevent->name = kmalloc(len + rem, GFP_KERNEL);
  224 + if (unlikely(!kevent->name)) {
  225 + kmem_cache_free(event_cachep, kevent);
  226 + return NULL;
  227 + }
  228 + memcpy(kevent->name, name, len);
  229 + if (rem)
  230 + memset(kevent->name + len, 0, rem);
  231 + kevent->event.len = len + rem;
  232 + } else {
  233 + kevent->event.len = 0;
  234 + kevent->name = NULL;
  235 + }
  236 +
  237 + return kevent;
  238 +}
  239 +
  240 +/*
  241 + * inotify_dev_get_event - return the next event in the given dev's queue
  242 + *
  243 + * Caller must hold dev->ev_mutex.
  244 + */
  245 +static inline struct inotify_kernel_event *
  246 +inotify_dev_get_event(struct inotify_device *dev)
  247 +{
  248 + return list_entry(dev->events.next, struct inotify_kernel_event, list);
  249 +}
  250 +
  251 +/*
  252 + * inotify_dev_get_last_event - return the last event in the given dev's queue
  253 + *
  254 + * Caller must hold dev->ev_mutex.
  255 + */
  256 +static inline struct inotify_kernel_event *
  257 +inotify_dev_get_last_event(struct inotify_device *dev)
  258 +{
  259 + if (list_empty(&dev->events))
  260 + return NULL;
  261 + return list_entry(dev->events.prev, struct inotify_kernel_event, list);
  262 +}
  263 +
  264 +/*
  265 + * inotify_dev_queue_event - event handler registered with core inotify, adds
  266 + * a new event to the given device
  267 + *
  268 + * Can sleep (calls kernel_event()).
  269 + */
  270 +static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
  271 + u32 cookie, const char *name,
  272 + struct inode *ignored)
  273 +{
  274 + struct inotify_user_watch *watch;
  275 + struct inotify_device *dev;
  276 + struct inotify_kernel_event *kevent, *last;
  277 +
  278 + watch = container_of(w, struct inotify_user_watch, wdata);
  279 + dev = watch->dev;
  280 +
  281 + mutex_lock(&dev->ev_mutex);
  282 +
  283 + /* we can safely put the watch as we don't reference it while
  284 + * generating the event
  285 + */
  286 + if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
  287 + put_inotify_watch(w); /* final put */
  288 +
  289 + /* coalescing: drop this event if it is a dupe of the previous */
  290 + last = inotify_dev_get_last_event(dev);
  291 + if (last && last->event.mask == mask && last->event.wd == wd &&
  292 + last->event.cookie == cookie) {
  293 + const char *lastname = last->name;
  294 +
  295 + if (!name && !lastname)
  296 + goto out;
  297 + if (name && lastname && !strcmp(lastname, name))
  298 + goto out;
  299 + }
  300 +
  301 + /* the queue overflowed and we already sent the Q_OVERFLOW event */
  302 + if (unlikely(dev->event_count > dev->max_events))
  303 + goto out;
  304 +
  305 + /* if the queue overflows, we need to notify user space */
  306 + if (unlikely(dev->event_count == dev->max_events))
  307 + kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
  308 + else
  309 + kevent = kernel_event(wd, mask, cookie, name);
  310 +
  311 + if (unlikely(!kevent))
  312 + goto out;
  313 +
  314 + /* queue the event and wake up anyone waiting */
  315 + dev->event_count++;
  316 + dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
  317 + list_add_tail(&kevent->list, &dev->events);
  318 + wake_up_interruptible(&dev->wq);
  319 + kill_fasync(&dev->fa, SIGIO, POLL_IN);
  320 +
  321 +out:
  322 + mutex_unlock(&dev->ev_mutex);
  323 +}
  324 +
  325 +/*
  326 + * remove_kevent - cleans up the given kevent
  327 + *
  328 + * Caller must hold dev->ev_mutex.
  329 + */
  330 +static void remove_kevent(struct inotify_device *dev,
  331 + struct inotify_kernel_event *kevent)
  332 +{
  333 + list_del(&kevent->list);
  334 +
  335 + dev->event_count--;
  336 + dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
  337 +}
  338 +
  339 +/*
  340 + * free_kevent - frees the given kevent.
  341 + */
  342 +static void free_kevent(struct inotify_kernel_event *kevent)
  343 +{
  344 + kfree(kevent->name);
  345 + kmem_cache_free(event_cachep, kevent);
  346 +}
  347 +
  348 +/*
  349 + * inotify_dev_event_dequeue - destroy an event on the given device
  350 + *
  351 + * Caller must hold dev->ev_mutex.
  352 + */
  353 +static void inotify_dev_event_dequeue(struct inotify_device *dev)
  354 +{
  355 + if (!list_empty(&dev->events)) {
  356 + struct inotify_kernel_event *kevent;
  357 + kevent = inotify_dev_get_event(dev);
  358 + remove_kevent(dev, kevent);
  359 + free_kevent(kevent);
  360 + }
  361 +}
  362 +
  363 +/*
  364 + * find_inode - resolve a user-given path to a specific inode
  365 + */
  366 +static int find_inode(const char __user *dirname, struct path *path,
  367 + unsigned flags)
  368 +{
  369 + int error;
  370 +
  371 + error = user_path_at(AT_FDCWD, dirname, flags, path);
  372 + if (error)
  373 + return error;
  374 + /* you can only watch an inode if you have read permissions on it */
  375 + error = inode_permission(path->dentry->d_inode, MAY_READ);
  376 + if (error)
  377 + path_put(path);
  378 + return error;
  379 +}
  380 +
  381 +/*
  382 + * create_watch - creates a watch on the given device.
  383 + *
  384 + * Callers must hold dev->up_mutex.
  385 + */
  386 +static int create_watch(struct inotify_device *dev, struct inode *inode,
  387 + u32 mask)
  388 +{
  389 + struct inotify_user_watch *watch;
  390 + int ret;
  391 +
  392 + if (atomic_read(&dev->user->inotify_watches) >=
  393 + inotify_max_user_watches)
  394 + return -ENOSPC;
  395 +
  396 + watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
  397 + if (unlikely(!watch))
  398 + return -ENOMEM;
  399 +
  400 + /* save a reference to device and bump the count to make it official */
  401 + get_inotify_dev(dev);
  402 + watch->dev = dev;
  403 +
  404 + atomic_inc(&dev->user->inotify_watches);
  405 +
  406 + inotify_init_watch(&watch->wdata);
  407 + ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
  408 + if (ret < 0)
  409 + free_inotify_user_watch(&watch->wdata);
  410 +
  411 + return ret;
  412 +}
  413 +
  414 +/* Device Interface */
  415 +
  416 +static unsigned int inotify_poll(struct file *file, poll_table *wait)
  417 +{
  418 + struct inotify_device *dev = file->private_data;
  419 + int ret = 0;
  420 +
  421 + poll_wait(file, &dev->wq, wait);
  422 + mutex_lock(&dev->ev_mutex);
  423 + if (!list_empty(&dev->events))
  424 + ret = POLLIN | POLLRDNORM;
  425 + mutex_unlock(&dev->ev_mutex);
  426 +
  427 + return ret;
  428 +}
  429 +
  430 +static ssize_t inotify_read(struct file *file, char __user *buf,
  431 + size_t count, loff_t *pos)
  432 +{
  433 + size_t event_size = sizeof (struct inotify_event);
  434 + struct inotify_device *dev;
  435 + char __user *start;
  436 + int ret;
  437 + DEFINE_WAIT(wait);
  438 +
  439 + start = buf;
  440 + dev = file->private_data;
  441 +
  442 + while (1) {
  443 +
  444 + prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
  445 +
  446 + mutex_lock(&dev->ev_mutex);
  447 + if (!list_empty(&dev->events)) {
  448 + ret = 0;
  449 + break;
  450 + }
  451 + mutex_unlock(&dev->ev_mutex);
  452 +
  453 + if (file->f_flags & O_NONBLOCK) {
  454 + ret = -EAGAIN;
  455 + break;
  456 + }
  457 +
  458 + if (signal_pending(current)) {
  459 + ret = -EINTR;
  460 + break;
  461 + }
  462 +
  463 + schedule();
  464 + }
  465 +
  466 + finish_wait(&dev->wq, &wait);
  467 + if (ret)
  468 + return ret;
  469 +
  470 + while (1) {
  471 + struct inotify_kernel_event *kevent;
  472 +
  473 + ret = buf - start;
  474 + if (list_empty(&dev->events))
  475 + break;
  476 +
  477 + kevent = inotify_dev_get_event(dev);
  478 + if (event_size + kevent->event.len > count) {
  479 + if (ret == 0 && count > 0) {
  480 + /*
  481 + * could not get a single event because we
  482 + * didn't have enough buffer space.
  483 + */
  484 + ret = -EINVAL;
  485 + }
  486 + break;
  487 + }
  488 + remove_kevent(dev, kevent);
  489 +
  490 + /*
  491 + * Must perform the copy_to_user outside the mutex in order
  492 + * to avoid a lock order reversal with mmap_sem.
  493 + */
  494 + mutex_unlock(&dev->ev_mutex);
  495 +
  496 + if (copy_to_user(buf, &kevent->event, event_size)) {
  497 + ret = -EFAULT;
  498 + break;
  499 + }
  500 + buf += event_size;
  501 + count -= event_size;
  502 +
  503 + if (kevent->name) {
  504 + if (copy_to_user(buf, kevent->name, kevent->event.len)){
  505 + ret = -EFAULT;
  506 + break;
  507 + }
  508 + buf += kevent->event.len;
  509 + count -= kevent->event.len;
  510 + }
  511 +
  512 + free_kevent(kevent);
  513 +
  514 + mutex_lock(&dev->ev_mutex);
  515 + }
  516 + mutex_unlock(&dev->ev_mutex);
  517 +
  518 + return ret;
  519 +}
  520 +
  521 +static int inotify_fasync(int fd, struct file *file, int on)
  522 +{
  523 + struct inotify_device *dev = file->private_data;
  524 +
  525 + return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
  526 +}
  527 +
  528 +static int inotify_release(struct inode *ignored, struct file *file)
  529 +{
  530 + struct inotify_device *dev = file->private_data;
  531 +
  532 + inotify_destroy(dev->ih);
  533 +
  534 + /* destroy all of the events on this device */
  535 + mutex_lock(&dev->ev_mutex);
  536 + while (!list_empty(&dev->events))
  537 + inotify_dev_event_dequeue(dev);
  538 + mutex_unlock(&dev->ev_mutex);
  539 +
  540 + /* free this device: the put matching the get in inotify_init() */
  541 + put_inotify_dev(dev);
  542 +
  543 + return 0;
  544 +}
  545 +
  546 +static long inotify_ioctl(struct file *file, unsigned int cmd,
  547 + unsigned long arg)
  548 +{
  549 + struct inotify_device *dev;
  550 + void __user *p;
  551 + int ret = -ENOTTY;
  552 +
  553 + dev = file->private_data;
  554 + p = (void __user *) arg;
  555 +
  556 + switch (cmd) {
  557 + case FIONREAD:
  558 + ret = put_user(dev->queue_size, (int __user *) p);
  559 + break;
  560 + }
  561 +
  562 + return ret;
  563 +}
  564 +
  565 +static const struct file_operations inotify_fops = {
  566 + .poll = inotify_poll,
  567 + .read = inotify_read,
  568 + .fasync = inotify_fasync,
  569 + .release = inotify_release,
  570 + .unlocked_ioctl = inotify_ioctl,
  571 + .compat_ioctl = inotify_ioctl,
  572 +};
  573 +
  574 +static const struct inotify_operations inotify_user_ops = {
  575 + .handle_event = inotify_dev_queue_event,
  576 + .destroy_watch = free_inotify_user_watch,
  577 +};
  578 +
  579 +asmlinkage long sys_inotify_init1(int flags)
  580 +{
  581 + struct inotify_device *dev;
  582 + struct inotify_handle *ih;
  583 + struct user_struct *user;
  584 + struct file *filp;
  585 + int fd, ret;
  586 +
  587 + /* Check the IN_* constants for consistency. */
  588 + BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
  589 + BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
  590 +
  591 + if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
  592 + return -EINVAL;
  593 +
  594 + fd = get_unused_fd_flags(flags & O_CLOEXEC);
  595 + if (fd < 0)
  596 + return fd;
  597 +
  598 + filp = get_empty_filp();
  599 + if (!filp) {
  600 + ret = -ENFILE;
  601 + goto out_put_fd;
  602 + }
  603 +
  604 + user = get_current_user();
  605 + if (unlikely(atomic_read(&user->inotify_devs) >=
  606 + inotify_max_user_instances)) {
  607 + ret = -EMFILE;
  608 + goto out_free_uid;
  609 + }
  610 +
  611 + dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
  612 + if (unlikely(!dev)) {
  613 + ret = -ENOMEM;
  614 + goto out_free_uid;
  615 + }
  616 +
  617 + ih = inotify_init(&inotify_user_ops);
  618 + if (IS_ERR(ih)) {
  619 + ret = PTR_ERR(ih);
  620 + goto out_free_dev;
  621 + }
  622 + dev->ih = ih;
  623 + dev->fa = NULL;
  624 +
  625 + filp->f_op = &inotify_fops;
  626 + filp->f_path.mnt = mntget(inotify_mnt);
  627 + filp->f_path.dentry = dget(inotify_mnt->mnt_root);
  628 + filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
  629 + filp->f_mode = FMODE_READ;
  630 + filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
  631 + filp->private_data = dev;
  632 +
  633 + INIT_LIST_HEAD(&dev->events);
  634 + init_waitqueue_head(&dev->wq);
  635 + mutex_init(&dev->ev_mutex);
  636 + mutex_init(&dev->up_mutex);
  637 + dev->event_count = 0;
  638 + dev->queue_size = 0;
  639 + dev->max_events = inotify_max_queued_events;
  640 + dev->user = user;
  641 + atomic_set(&dev->count, 0);
  642 +
  643 + get_inotify_dev(dev);
  644 + atomic_inc(&user->inotify_devs);
  645 + fd_install(fd, filp);
  646 +
  647 + return fd;
  648 +out_free_dev:
  649 + kfree(dev);
  650 +out_free_uid:
  651 + free_uid(user);
  652 + put_filp(filp);
  653 +out_put_fd:
  654 + put_unused_fd(fd);
  655 + return ret;
  656 +}
  657 +
  658 +asmlinkage long sys_inotify_init(void)
  659 +{
  660 + return sys_inotify_init1(0);
  661 +}
  662 +
  663 +asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
  664 +{
  665 + struct inode *inode;
  666 + struct inotify_device *dev;
  667 + struct path path;
  668 + struct file *filp;
  669 + int ret, fput_needed;
  670 + unsigned flags = 0;
  671 +
  672 + filp = fget_light(fd, &fput_needed);
  673 + if (unlikely(!filp))
  674 + return -EBADF;
  675 +
  676 + /* verify that this is indeed an inotify instance */
  677 + if (unlikely(filp->f_op != &inotify_fops)) {
  678 + ret = -EINVAL;
  679 + goto fput_and_out;
  680 + }
  681 +
  682 + if (!(mask & IN_DONT_FOLLOW))
  683 + flags |= LOOKUP_FOLLOW;
  684 + if (mask & IN_ONLYDIR)
  685 + flags |= LOOKUP_DIRECTORY;
  686 +
  687 + ret = find_inode(pathname, &path, flags);
  688 + if (unlikely(ret))
  689 + goto fput_and_out;
  690 +
  691 + /* inode held in place by reference to path; dev by fget on fd */
  692 + inode = path.dentry->d_inode;
  693 + dev = filp->private_data;
  694 +
  695 + mutex_lock(&dev->up_mutex);
  696 + ret = inotify_find_update_watch(dev->ih, inode, mask);
  697 + if (ret == -ENOENT)
  698 + ret = create_watch(dev, inode, mask);
  699 + mutex_unlock(&dev->up_mutex);
  700 +
  701 + path_put(&path);
  702 +fput_and_out:
  703 + fput_light(filp, fput_needed);
  704 + return ret;
  705 +}
  706 +
  707 +asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
  708 +{
  709 + struct file *filp;
  710 + struct inotify_device *dev;
  711 + int ret, fput_needed;
  712 +
  713 + filp = fget_light(fd, &fput_needed);
  714 + if (unlikely(!filp))
  715 + return -EBADF;
  716 +
  717 + /* verify that this is indeed an inotify instance */
  718 + if (unlikely(filp->f_op != &inotify_fops)) {
  719 + ret = -EINVAL;
  720 + goto out;
  721 + }
  722 +
  723 + dev = filp->private_data;
  724 +
  725 + /* we free our watch data when we get IN_IGNORED */
  726 + ret = inotify_rm_wd(dev->ih, wd);
  727 +
  728 +out:
  729 + fput_light(filp, fput_needed);
  730 + return ret;
  731 +}
  732 +
  733 +static int
  734 +inotify_get_sb(struct file_system_type *fs_type, int flags,
  735 + const char *dev_name, void *data, struct vfsmount *mnt)
  736 +{
  737 + return get_sb_pseudo(fs_type, "inotify", NULL,
  738 + INOTIFYFS_SUPER_MAGIC, mnt);
  739 +}
  740 +
  741 +static struct file_system_type inotify_fs_type = {
  742 + .name = "inotifyfs",
  743 + .get_sb = inotify_get_sb,
  744 + .kill_sb = kill_anon_super,
  745 +};
  746 +
  747 +/*
  748 + * inotify_user_setup - Our initialization function. Note that we cannnot return
  749 + * error because we have compiled-in VFS hooks. So an (unlikely) failure here
  750 + * must result in panic().
  751 + */
  752 +static int __init inotify_user_setup(void)
  753 +{
  754 + int ret;
  755 +
  756 + ret = register_filesystem(&inotify_fs_type);
  757 + if (unlikely(ret))
  758 + panic("inotify: register_filesystem returned %d!\n", ret);
  759 +
  760 + inotify_mnt = kern_mount(&inotify_fs_type);
  761 + if (IS_ERR(inotify_mnt))
  762 + panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
  763 +
  764 + inotify_max_queued_events = 16384;
  765 + inotify_max_user_instances = 128;
  766 + inotify_max_user_watches = 8192;
  767 +
  768 + watch_cachep = kmem_cache_create("inotify_watch_cache",
  769 + sizeof(struct inotify_user_watch),
  770 + 0, SLAB_PANIC, NULL);
  771 + event_cachep = kmem_cache_create("inotify_event_cache",
  772 + sizeof(struct inotify_kernel_event),
  773 + 0, SLAB_PANIC, NULL);
  774 +
  775 + return 0;
  776 +}
  777 +
  778 +module_init(inotify_user_setup);