Commit 7b2b1fee30df7e2165525cd03f7d1d01a3a56794
Committed by
Linus Torvalds
1 parent
fce1456a19
Exists in
master
and in
7 other branches
[PATCH] knfsd: knfsd: cache ipmap per TCP socket
Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 5 changed files with 61 additions and 3 deletions Inline Diff
include/linux/sunrpc/cache.h
1 | /* | 1 | /* |
2 | * include/linux/sunrpc/cache.h | 2 | * include/linux/sunrpc/cache.h |
3 | * | 3 | * |
4 | * Generic code for various authentication-related caches | 4 | * Generic code for various authentication-related caches |
5 | * used by sunrpc clients and servers. | 5 | * used by sunrpc clients and servers. |
6 | * | 6 | * |
7 | * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> | 7 | * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> |
8 | * | 8 | * |
9 | * Released under terms in GPL version 2. See COPYING. | 9 | * Released under terms in GPL version 2. See COPYING. |
10 | * | 10 | * |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #ifndef _LINUX_SUNRPC_CACHE_H_ | 13 | #ifndef _LINUX_SUNRPC_CACHE_H_ |
14 | #define _LINUX_SUNRPC_CACHE_H_ | 14 | #define _LINUX_SUNRPC_CACHE_H_ |
15 | 15 | ||
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <asm/atomic.h> | 17 | #include <asm/atomic.h> |
18 | #include <linux/proc_fs.h> | 18 | #include <linux/proc_fs.h> |
19 | 19 | ||
20 | /* | 20 | /* |
21 | * Each cache requires: | 21 | * Each cache requires: |
22 | * - A 'struct cache_detail' which contains information specific to the cache | 22 | * - A 'struct cache_detail' which contains information specific to the cache |
23 | * for common code to use. | 23 | * for common code to use. |
24 | * - An item structure that must contain a "struct cache_head" | 24 | * - An item structure that must contain a "struct cache_head" |
25 | * - A lookup function defined using DefineCacheLookup | 25 | * - A lookup function defined using DefineCacheLookup |
26 | * - A 'put' function that can release a cache item. It will only | 26 | * - A 'put' function that can release a cache item. It will only |
27 | * be called after cache_put has succeed, so there are guarantee | 27 | * be called after cache_put has succeed, so there are guarantee |
28 | * to be no references. | 28 | * to be no references. |
29 | * - A function to calculate a hash of an item's key. | 29 | * - A function to calculate a hash of an item's key. |
30 | * | 30 | * |
31 | * as well as assorted code fragments (e.g. compare keys) and numbers | 31 | * as well as assorted code fragments (e.g. compare keys) and numbers |
32 | * (e.g. hash size, goal_age, etc). | 32 | * (e.g. hash size, goal_age, etc). |
33 | * | 33 | * |
34 | * Each cache must be registered so that it can be cleaned regularly. | 34 | * Each cache must be registered so that it can be cleaned regularly. |
35 | * When the cache is unregistered, it is flushed completely. | 35 | * When the cache is unregistered, it is flushed completely. |
36 | * | 36 | * |
37 | * Entries have a ref count and a 'hashed' flag which counts the existance | 37 | * Entries have a ref count and a 'hashed' flag which counts the existance |
38 | * in the hash table. | 38 | * in the hash table. |
39 | * We only expire entries when refcount is zero. | 39 | * We only expire entries when refcount is zero. |
40 | * Existance in the cache is counted the refcount. | 40 | * Existance in the cache is counted the refcount. |
41 | */ | 41 | */ |
42 | 42 | ||
43 | /* Every cache item has a common header that is used | 43 | /* Every cache item has a common header that is used |
44 | * for expiring and refreshing entries. | 44 | * for expiring and refreshing entries. |
45 | * | 45 | * |
46 | */ | 46 | */ |
47 | struct cache_head { | 47 | struct cache_head { |
48 | struct cache_head * next; | 48 | struct cache_head * next; |
49 | time_t expiry_time; /* After time time, don't use the data */ | 49 | time_t expiry_time; /* After time time, don't use the data */ |
50 | time_t last_refresh; /* If CACHE_PENDING, this is when upcall | 50 | time_t last_refresh; /* If CACHE_PENDING, this is when upcall |
51 | * was sent, else this is when update was received | 51 | * was sent, else this is when update was received |
52 | */ | 52 | */ |
53 | struct kref ref; | 53 | struct kref ref; |
54 | unsigned long flags; | 54 | unsigned long flags; |
55 | }; | 55 | }; |
56 | #define CACHE_VALID 0 /* Entry contains valid data */ | 56 | #define CACHE_VALID 0 /* Entry contains valid data */ |
57 | #define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ | 57 | #define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ |
58 | #define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ | 58 | #define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ |
59 | 59 | ||
60 | #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ | 60 | #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ |
61 | 61 | ||
62 | struct cache_detail { | 62 | struct cache_detail { |
63 | struct module * owner; | 63 | struct module * owner; |
64 | int hash_size; | 64 | int hash_size; |
65 | struct cache_head ** hash_table; | 65 | struct cache_head ** hash_table; |
66 | rwlock_t hash_lock; | 66 | rwlock_t hash_lock; |
67 | 67 | ||
68 | atomic_t inuse; /* active user-space update or lookup */ | 68 | atomic_t inuse; /* active user-space update or lookup */ |
69 | 69 | ||
70 | char *name; | 70 | char *name; |
71 | void (*cache_put)(struct kref *); | 71 | void (*cache_put)(struct kref *); |
72 | 72 | ||
73 | void (*cache_request)(struct cache_detail *cd, | 73 | void (*cache_request)(struct cache_detail *cd, |
74 | struct cache_head *h, | 74 | struct cache_head *h, |
75 | char **bpp, int *blen); | 75 | char **bpp, int *blen); |
76 | int (*cache_parse)(struct cache_detail *, | 76 | int (*cache_parse)(struct cache_detail *, |
77 | char *buf, int len); | 77 | char *buf, int len); |
78 | 78 | ||
79 | int (*cache_show)(struct seq_file *m, | 79 | int (*cache_show)(struct seq_file *m, |
80 | struct cache_detail *cd, | 80 | struct cache_detail *cd, |
81 | struct cache_head *h); | 81 | struct cache_head *h); |
82 | 82 | ||
83 | struct cache_head * (*alloc)(void); | 83 | struct cache_head * (*alloc)(void); |
84 | int (*match)(struct cache_head *orig, struct cache_head *new); | 84 | int (*match)(struct cache_head *orig, struct cache_head *new); |
85 | void (*init)(struct cache_head *orig, struct cache_head *new); | 85 | void (*init)(struct cache_head *orig, struct cache_head *new); |
86 | void (*update)(struct cache_head *orig, struct cache_head *new); | 86 | void (*update)(struct cache_head *orig, struct cache_head *new); |
87 | 87 | ||
88 | /* fields below this comment are for internal use | 88 | /* fields below this comment are for internal use |
89 | * and should not be touched by cache owners | 89 | * and should not be touched by cache owners |
90 | */ | 90 | */ |
91 | time_t flush_time; /* flush all cache items with last_refresh | 91 | time_t flush_time; /* flush all cache items with last_refresh |
92 | * earlier than this */ | 92 | * earlier than this */ |
93 | struct list_head others; | 93 | struct list_head others; |
94 | time_t nextcheck; | 94 | time_t nextcheck; |
95 | int entries; | 95 | int entries; |
96 | 96 | ||
97 | /* fields for communication over channel */ | 97 | /* fields for communication over channel */ |
98 | struct list_head queue; | 98 | struct list_head queue; |
99 | struct proc_dir_entry *proc_ent; | 99 | struct proc_dir_entry *proc_ent; |
100 | struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; | 100 | struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; |
101 | 101 | ||
102 | atomic_t readers; /* how many time is /chennel open */ | 102 | atomic_t readers; /* how many time is /chennel open */ |
103 | time_t last_close; /* if no readers, when did last close */ | 103 | time_t last_close; /* if no readers, when did last close */ |
104 | time_t last_warn; /* when we last warned about no readers */ | 104 | time_t last_warn; /* when we last warned about no readers */ |
105 | void (*warn_no_listener)(struct cache_detail *cd); | 105 | void (*warn_no_listener)(struct cache_detail *cd); |
106 | }; | 106 | }; |
107 | 107 | ||
108 | 108 | ||
109 | /* this must be embedded in any request structure that | 109 | /* this must be embedded in any request structure that |
110 | * identifies an object that will want a callback on | 110 | * identifies an object that will want a callback on |
111 | * a cache fill | 111 | * a cache fill |
112 | */ | 112 | */ |
113 | struct cache_req { | 113 | struct cache_req { |
114 | struct cache_deferred_req *(*defer)(struct cache_req *req); | 114 | struct cache_deferred_req *(*defer)(struct cache_req *req); |
115 | }; | 115 | }; |
116 | /* this must be embedded in a deferred_request that is being | 116 | /* this must be embedded in a deferred_request that is being |
117 | * delayed awaiting cache-fill | 117 | * delayed awaiting cache-fill |
118 | */ | 118 | */ |
119 | struct cache_deferred_req { | 119 | struct cache_deferred_req { |
120 | struct list_head hash; /* on hash chain */ | 120 | struct list_head hash; /* on hash chain */ |
121 | struct list_head recent; /* on fifo */ | 121 | struct list_head recent; /* on fifo */ |
122 | struct cache_head *item; /* cache item we wait on */ | 122 | struct cache_head *item; /* cache item we wait on */ |
123 | time_t recv_time; | 123 | time_t recv_time; |
124 | void *owner; /* we might need to discard all defered requests | 124 | void *owner; /* we might need to discard all defered requests |
125 | * owned by someone */ | 125 | * owned by someone */ |
126 | void (*revisit)(struct cache_deferred_req *req, | 126 | void (*revisit)(struct cache_deferred_req *req, |
127 | int too_many); | 127 | int too_many); |
128 | }; | 128 | }; |
129 | 129 | ||
130 | 130 | ||
131 | extern struct cache_head * | 131 | extern struct cache_head * |
132 | sunrpc_cache_lookup(struct cache_detail *detail, | 132 | sunrpc_cache_lookup(struct cache_detail *detail, |
133 | struct cache_head *key, int hash); | 133 | struct cache_head *key, int hash); |
134 | extern struct cache_head * | 134 | extern struct cache_head * |
135 | sunrpc_cache_update(struct cache_detail *detail, | 135 | sunrpc_cache_update(struct cache_detail *detail, |
136 | struct cache_head *new, struct cache_head *old, int hash); | 136 | struct cache_head *new, struct cache_head *old, int hash); |
137 | 137 | ||
138 | 138 | ||
139 | #define cache_for_each(pos, detail, index, member) \ | 139 | #define cache_for_each(pos, detail, index, member) \ |
140 | for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ | 140 | for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ |
141 | ({if (index==0)read_unlock(&(detail)->hash_lock); index--;}); \ | 141 | ({if (index==0)read_unlock(&(detail)->hash_lock); index--;}); \ |
142 | ) \ | 142 | ) \ |
143 | for (pos = container_of((detail)->hash_table[index], typeof(*pos), member); \ | 143 | for (pos = container_of((detail)->hash_table[index], typeof(*pos), member); \ |
144 | &pos->member; \ | 144 | &pos->member; \ |
145 | pos = container_of(pos->member.next, typeof(*pos), member)) | 145 | pos = container_of(pos->member.next, typeof(*pos), member)) |
146 | 146 | ||
147 | 147 | ||
148 | 148 | ||
149 | extern void cache_clean_deferred(void *owner); | 149 | extern void cache_clean_deferred(void *owner); |
150 | 150 | ||
151 | static inline struct cache_head *cache_get(struct cache_head *h) | 151 | static inline struct cache_head *cache_get(struct cache_head *h) |
152 | { | 152 | { |
153 | kref_get(&h->ref); | 153 | kref_get(&h->ref); |
154 | return h; | 154 | return h; |
155 | } | 155 | } |
156 | 156 | ||
157 | 157 | ||
158 | static inline void cache_put(struct cache_head *h, struct cache_detail *cd) | 158 | static inline void cache_put(struct cache_head *h, struct cache_detail *cd) |
159 | { | 159 | { |
160 | if (atomic_read(&h->ref.refcount) <= 2 && | 160 | if (atomic_read(&h->ref.refcount) <= 2 && |
161 | h->expiry_time < cd->nextcheck) | 161 | h->expiry_time < cd->nextcheck) |
162 | cd->nextcheck = h->expiry_time; | 162 | cd->nextcheck = h->expiry_time; |
163 | kref_put(&h->ref, cd->cache_put); | 163 | kref_put(&h->ref, cd->cache_put); |
164 | } | 164 | } |
165 | 165 | ||
166 | static inline int cache_valid(struct cache_head *h) | ||
167 | { | ||
168 | /* If an item has been unhashed pending removal when | ||
169 | * the refcount drops to 0, the expiry_time will be | ||
170 | * set to 0. We don't want to consider such items | ||
171 | * valid in this context even though CACHE_VALID is | ||
172 | * set. | ||
173 | */ | ||
174 | return (h->expiry_time != 0 && test_bit(CACHE_VALID, &h->flags)); | ||
175 | } | ||
176 | |||
166 | extern int cache_check(struct cache_detail *detail, | 177 | extern int cache_check(struct cache_detail *detail, |
167 | struct cache_head *h, struct cache_req *rqstp); | 178 | struct cache_head *h, struct cache_req *rqstp); |
168 | extern void cache_flush(void); | 179 | extern void cache_flush(void); |
169 | extern void cache_purge(struct cache_detail *detail); | 180 | extern void cache_purge(struct cache_detail *detail); |
170 | #define NEVER (0x7FFFFFFF) | 181 | #define NEVER (0x7FFFFFFF) |
171 | extern void cache_register(struct cache_detail *cd); | 182 | extern void cache_register(struct cache_detail *cd); |
172 | extern int cache_unregister(struct cache_detail *cd); | 183 | extern int cache_unregister(struct cache_detail *cd); |
173 | 184 | ||
174 | extern void qword_add(char **bpp, int *lp, char *str); | 185 | extern void qword_add(char **bpp, int *lp, char *str); |
175 | extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); | 186 | extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); |
176 | extern int qword_get(char **bpp, char *dest, int bufsize); | 187 | extern int qword_get(char **bpp, char *dest, int bufsize); |
177 | 188 | ||
178 | static inline int get_int(char **bpp, int *anint) | 189 | static inline int get_int(char **bpp, int *anint) |
179 | { | 190 | { |
180 | char buf[50]; | 191 | char buf[50]; |
181 | char *ep; | 192 | char *ep; |
182 | int rv; | 193 | int rv; |
183 | int len = qword_get(bpp, buf, 50); | 194 | int len = qword_get(bpp, buf, 50); |
184 | if (len < 0) return -EINVAL; | 195 | if (len < 0) return -EINVAL; |
185 | if (len ==0) return -ENOENT; | 196 | if (len ==0) return -ENOENT; |
186 | rv = simple_strtol(buf, &ep, 0); | 197 | rv = simple_strtol(buf, &ep, 0); |
187 | if (*ep) return -EINVAL; | 198 | if (*ep) return -EINVAL; |
188 | *anint = rv; | 199 | *anint = rv; |
189 | return 0; | 200 | return 0; |
190 | } | 201 | } |
191 | 202 | ||
192 | static inline time_t get_expiry(char **bpp) | 203 | static inline time_t get_expiry(char **bpp) |
193 | { | 204 | { |
194 | int rv; | 205 | int rv; |
195 | if (get_int(bpp, &rv)) | 206 | if (get_int(bpp, &rv)) |
196 | return 0; | 207 | return 0; |
197 | if (rv < 0) | 208 | if (rv < 0) |
198 | return 0; | 209 | return 0; |
199 | return rv; | 210 | return rv; |
200 | } | 211 | } |
201 | 212 | ||
202 | #endif /* _LINUX_SUNRPC_CACHE_H_ */ | 213 | #endif /* _LINUX_SUNRPC_CACHE_H_ */ |
203 | 214 |
include/linux/sunrpc/svcauth.h
1 | /* | 1 | /* |
2 | * linux/include/linux/sunrpc/svcauth.h | 2 | * linux/include/linux/sunrpc/svcauth.h |
3 | * | 3 | * |
4 | * RPC server-side authentication stuff. | 4 | * RPC server-side authentication stuff. |
5 | * | 5 | * |
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #ifndef _LINUX_SUNRPC_SVCAUTH_H_ | 9 | #ifndef _LINUX_SUNRPC_SVCAUTH_H_ |
10 | #define _LINUX_SUNRPC_SVCAUTH_H_ | 10 | #define _LINUX_SUNRPC_SVCAUTH_H_ |
11 | 11 | ||
12 | #ifdef __KERNEL__ | 12 | #ifdef __KERNEL__ |
13 | 13 | ||
14 | #include <linux/string.h> | 14 | #include <linux/string.h> |
15 | #include <linux/sunrpc/msg_prot.h> | 15 | #include <linux/sunrpc/msg_prot.h> |
16 | #include <linux/sunrpc/cache.h> | 16 | #include <linux/sunrpc/cache.h> |
17 | #include <linux/hash.h> | 17 | #include <linux/hash.h> |
18 | 18 | ||
19 | #define SVC_CRED_NGROUPS 32 | 19 | #define SVC_CRED_NGROUPS 32 |
20 | struct svc_cred { | 20 | struct svc_cred { |
21 | uid_t cr_uid; | 21 | uid_t cr_uid; |
22 | gid_t cr_gid; | 22 | gid_t cr_gid; |
23 | struct group_info *cr_group_info; | 23 | struct group_info *cr_group_info; |
24 | }; | 24 | }; |
25 | 25 | ||
26 | struct svc_rqst; /* forward decl */ | 26 | struct svc_rqst; /* forward decl */ |
27 | 27 | ||
28 | /* Authentication is done in the context of a domain. | 28 | /* Authentication is done in the context of a domain. |
29 | * | 29 | * |
30 | * Currently, the nfs server uses the auth_domain to stand | 30 | * Currently, the nfs server uses the auth_domain to stand |
31 | * for the "client" listed in /etc/exports. | 31 | * for the "client" listed in /etc/exports. |
32 | * | 32 | * |
33 | * More generally, a domain might represent a group of clients using | 33 | * More generally, a domain might represent a group of clients using |
34 | * a common mechanism for authentication and having a common mapping | 34 | * a common mechanism for authentication and having a common mapping |
35 | * between local identity (uid) and network identity. All clients | 35 | * between local identity (uid) and network identity. All clients |
36 | * in a domain have similar general access rights. Each domain can | 36 | * in a domain have similar general access rights. Each domain can |
37 | * contain multiple principals which will have different specific right | 37 | * contain multiple principals which will have different specific right |
38 | * based on normal Discretionary Access Control. | 38 | * based on normal Discretionary Access Control. |
39 | * | 39 | * |
40 | * A domain is created by an authentication flavour module based on name | 40 | * A domain is created by an authentication flavour module based on name |
41 | * only. Userspace then fills in detail on demand. | 41 | * only. Userspace then fills in detail on demand. |
42 | * | 42 | * |
43 | * In the case of auth_unix and auth_null, the auth_domain is also | 43 | * In the case of auth_unix and auth_null, the auth_domain is also |
44 | * associated with entries in another cache representing the mapping | 44 | * associated with entries in another cache representing the mapping |
45 | * of ip addresses to the given client. | 45 | * of ip addresses to the given client. |
46 | */ | 46 | */ |
47 | struct auth_domain { | 47 | struct auth_domain { |
48 | struct kref ref; | 48 | struct kref ref; |
49 | struct hlist_node hash; | 49 | struct hlist_node hash; |
50 | char *name; | 50 | char *name; |
51 | struct auth_ops *flavour; | 51 | struct auth_ops *flavour; |
52 | }; | 52 | }; |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * Each authentication flavour registers an auth_ops | 55 | * Each authentication flavour registers an auth_ops |
56 | * structure. | 56 | * structure. |
57 | * name is simply the name. | 57 | * name is simply the name. |
58 | * flavour gives the auth flavour. It determines where the flavour is registered | 58 | * flavour gives the auth flavour. It determines where the flavour is registered |
59 | * accept() is given a request and should verify it. | 59 | * accept() is given a request and should verify it. |
60 | * It should inspect the authenticator and verifier, and possibly the data. | 60 | * It should inspect the authenticator and verifier, and possibly the data. |
61 | * If there is a problem with the authentication *authp should be set. | 61 | * If there is a problem with the authentication *authp should be set. |
62 | * The return value of accept() can indicate: | 62 | * The return value of accept() can indicate: |
63 | * OK - authorised. client and credential are set in rqstp. | 63 | * OK - authorised. client and credential are set in rqstp. |
64 | * reqbuf points to arguments | 64 | * reqbuf points to arguments |
65 | * resbuf points to good place for results. verfier | 65 | * resbuf points to good place for results. verfier |
66 | * is (probably) already in place. Certainly space is | 66 | * is (probably) already in place. Certainly space is |
67 | * reserved for it. | 67 | * reserved for it. |
68 | * DROP - simply drop the request. It may have been deferred | 68 | * DROP - simply drop the request. It may have been deferred |
69 | * GARBAGE - rpc garbage_args error | 69 | * GARBAGE - rpc garbage_args error |
70 | * SYSERR - rpc system_err error | 70 | * SYSERR - rpc system_err error |
71 | * DENIED - authp holds reason for denial. | 71 | * DENIED - authp holds reason for denial. |
72 | * COMPLETE - the reply is encoded already and ready to be sent; no | 72 | * COMPLETE - the reply is encoded already and ready to be sent; no |
73 | * further processing is necessary. (This is used for processing | 73 | * further processing is necessary. (This is used for processing |
74 | * null procedure calls which are used to set up encryption | 74 | * null procedure calls which are used to set up encryption |
75 | * contexts.) | 75 | * contexts.) |
76 | * | 76 | * |
77 | * accept is passed the proc number so that it can accept NULL rpc requests | 77 | * accept is passed the proc number so that it can accept NULL rpc requests |
78 | * even if it cannot authenticate the client (as is sometimes appropriate). | 78 | * even if it cannot authenticate the client (as is sometimes appropriate). |
79 | * | 79 | * |
80 | * release() is given a request after the procedure has been run. | 80 | * release() is given a request after the procedure has been run. |
81 | * It should sign/encrypt the results if needed | 81 | * It should sign/encrypt the results if needed |
82 | * It should return: | 82 | * It should return: |
83 | * OK - the resbuf is ready to be sent | 83 | * OK - the resbuf is ready to be sent |
84 | * DROP - the reply should be quitely dropped | 84 | * DROP - the reply should be quitely dropped |
85 | * DENIED - authp holds a reason for MSG_DENIED | 85 | * DENIED - authp holds a reason for MSG_DENIED |
86 | * SYSERR - rpc system_err | 86 | * SYSERR - rpc system_err |
87 | * | 87 | * |
88 | * domain_release() | 88 | * domain_release() |
89 | * This call releases a domain. | 89 | * This call releases a domain. |
90 | * set_client() | 90 | * set_client() |
91 | * Givens a pending request (struct svc_rqst), finds and assigns | 91 | * Givens a pending request (struct svc_rqst), finds and assigns |
92 | * an appropriate 'auth_domain' as the client. | 92 | * an appropriate 'auth_domain' as the client. |
93 | */ | 93 | */ |
94 | struct auth_ops { | 94 | struct auth_ops { |
95 | char * name; | 95 | char * name; |
96 | struct module *owner; | 96 | struct module *owner; |
97 | int flavour; | 97 | int flavour; |
98 | int (*accept)(struct svc_rqst *rq, __be32 *authp); | 98 | int (*accept)(struct svc_rqst *rq, __be32 *authp); |
99 | int (*release)(struct svc_rqst *rq); | 99 | int (*release)(struct svc_rqst *rq); |
100 | void (*domain_release)(struct auth_domain *); | 100 | void (*domain_release)(struct auth_domain *); |
101 | int (*set_client)(struct svc_rqst *rq); | 101 | int (*set_client)(struct svc_rqst *rq); |
102 | }; | 102 | }; |
103 | 103 | ||
104 | #define SVC_GARBAGE 1 | 104 | #define SVC_GARBAGE 1 |
105 | #define SVC_SYSERR 2 | 105 | #define SVC_SYSERR 2 |
106 | #define SVC_VALID 3 | 106 | #define SVC_VALID 3 |
107 | #define SVC_NEGATIVE 4 | 107 | #define SVC_NEGATIVE 4 |
108 | #define SVC_OK 5 | 108 | #define SVC_OK 5 |
109 | #define SVC_DROP 6 | 109 | #define SVC_DROP 6 |
110 | #define SVC_DENIED 7 | 110 | #define SVC_DENIED 7 |
111 | #define SVC_PENDING 8 | 111 | #define SVC_PENDING 8 |
112 | #define SVC_COMPLETE 9 | 112 | #define SVC_COMPLETE 9 |
113 | 113 | ||
114 | 114 | ||
115 | extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); | 115 | extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); |
116 | extern int svc_authorise(struct svc_rqst *rqstp); | 116 | extern int svc_authorise(struct svc_rqst *rqstp); |
117 | extern int svc_set_client(struct svc_rqst *rqstp); | 117 | extern int svc_set_client(struct svc_rqst *rqstp); |
118 | extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); | 118 | extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); |
119 | extern void svc_auth_unregister(rpc_authflavor_t flavor); | 119 | extern void svc_auth_unregister(rpc_authflavor_t flavor); |
120 | 120 | ||
121 | extern struct auth_domain *unix_domain_find(char *name); | 121 | extern struct auth_domain *unix_domain_find(char *name); |
122 | extern void auth_domain_put(struct auth_domain *item); | 122 | extern void auth_domain_put(struct auth_domain *item); |
123 | extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom); | 123 | extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom); |
124 | extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); | 124 | extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); |
125 | extern struct auth_domain *auth_domain_find(char *name); | 125 | extern struct auth_domain *auth_domain_find(char *name); |
126 | extern struct auth_domain *auth_unix_lookup(struct in_addr addr); | 126 | extern struct auth_domain *auth_unix_lookup(struct in_addr addr); |
127 | extern int auth_unix_forget_old(struct auth_domain *dom); | 127 | extern int auth_unix_forget_old(struct auth_domain *dom); |
128 | extern void svcauth_unix_purge(void); | 128 | extern void svcauth_unix_purge(void); |
129 | extern void svcauth_unix_info_release(void *); | ||
129 | 130 | ||
130 | static inline unsigned long hash_str(char *name, int bits) | 131 | static inline unsigned long hash_str(char *name, int bits) |
131 | { | 132 | { |
132 | unsigned long hash = 0; | 133 | unsigned long hash = 0; |
133 | unsigned long l = 0; | 134 | unsigned long l = 0; |
134 | int len = 0; | 135 | int len = 0; |
135 | unsigned char c; | 136 | unsigned char c; |
136 | do { | 137 | do { |
137 | if (unlikely(!(c = *name++))) { | 138 | if (unlikely(!(c = *name++))) { |
138 | c = (char)len; len = -1; | 139 | c = (char)len; len = -1; |
139 | } | 140 | } |
140 | l = (l << 8) | c; | 141 | l = (l << 8) | c; |
141 | len++; | 142 | len++; |
142 | if ((len & (BITS_PER_LONG/8-1))==0) | 143 | if ((len & (BITS_PER_LONG/8-1))==0) |
143 | hash = hash_long(hash^l, BITS_PER_LONG); | 144 | hash = hash_long(hash^l, BITS_PER_LONG); |
144 | } while (len); | 145 | } while (len); |
145 | return hash >> (BITS_PER_LONG - bits); | 146 | return hash >> (BITS_PER_LONG - bits); |
146 | } | 147 | } |
147 | 148 | ||
148 | static inline unsigned long hash_mem(char *buf, int length, int bits) | 149 | static inline unsigned long hash_mem(char *buf, int length, int bits) |
149 | { | 150 | { |
150 | unsigned long hash = 0; | 151 | unsigned long hash = 0; |
151 | unsigned long l = 0; | 152 | unsigned long l = 0; |
152 | int len = 0; | 153 | int len = 0; |
153 | unsigned char c; | 154 | unsigned char c; |
154 | do { | 155 | do { |
155 | if (len == length) { | 156 | if (len == length) { |
156 | c = (char)len; len = -1; | 157 | c = (char)len; len = -1; |
157 | } else | 158 | } else |
158 | c = *buf++; | 159 | c = *buf++; |
159 | l = (l << 8) | c; | 160 | l = (l << 8) | c; |
160 | len++; | 161 | len++; |
161 | if ((len & (BITS_PER_LONG/8-1))==0) | 162 | if ((len & (BITS_PER_LONG/8-1))==0) |
162 | hash = hash_long(hash^l, BITS_PER_LONG); | 163 | hash = hash_long(hash^l, BITS_PER_LONG); |
163 | } while (len); | 164 | } while (len); |
164 | return hash >> (BITS_PER_LONG - bits); | 165 | return hash >> (BITS_PER_LONG - bits); |
165 | } | 166 | } |
166 | 167 | ||
167 | #endif /* __KERNEL__ */ | 168 | #endif /* __KERNEL__ */ |
168 | 169 | ||
169 | #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ | 170 | #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ |
170 | 171 |
include/linux/sunrpc/svcsock.h
1 | /* | 1 | /* |
2 | * linux/include/linux/sunrpc/svcsock.h | 2 | * linux/include/linux/sunrpc/svcsock.h |
3 | * | 3 | * |
4 | * RPC server socket I/O. | 4 | * RPC server socket I/O. |
5 | * | 5 | * |
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #ifndef SUNRPC_SVCSOCK_H | 9 | #ifndef SUNRPC_SVCSOCK_H |
10 | #define SUNRPC_SVCSOCK_H | 10 | #define SUNRPC_SVCSOCK_H |
11 | 11 | ||
12 | #include <linux/sunrpc/svc.h> | 12 | #include <linux/sunrpc/svc.h> |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * RPC server socket. | 15 | * RPC server socket. |
16 | */ | 16 | */ |
17 | struct svc_sock { | 17 | struct svc_sock { |
18 | struct list_head sk_ready; /* list of ready sockets */ | 18 | struct list_head sk_ready; /* list of ready sockets */ |
19 | struct list_head sk_list; /* list of all sockets */ | 19 | struct list_head sk_list; /* list of all sockets */ |
20 | struct socket * sk_sock; /* berkeley socket layer */ | 20 | struct socket * sk_sock; /* berkeley socket layer */ |
21 | struct sock * sk_sk; /* INET layer */ | 21 | struct sock * sk_sk; /* INET layer */ |
22 | 22 | ||
23 | struct svc_pool * sk_pool; /* current pool iff queued */ | 23 | struct svc_pool * sk_pool; /* current pool iff queued */ |
24 | struct svc_serv * sk_server; /* service for this socket */ | 24 | struct svc_serv * sk_server; /* service for this socket */ |
25 | atomic_t sk_inuse; /* use count */ | 25 | atomic_t sk_inuse; /* use count */ |
26 | unsigned long sk_flags; | 26 | unsigned long sk_flags; |
27 | #define SK_BUSY 0 /* enqueued/receiving */ | 27 | #define SK_BUSY 0 /* enqueued/receiving */ |
28 | #define SK_CONN 1 /* conn pending */ | 28 | #define SK_CONN 1 /* conn pending */ |
29 | #define SK_CLOSE 2 /* dead or dying */ | 29 | #define SK_CLOSE 2 /* dead or dying */ |
30 | #define SK_DATA 3 /* data pending */ | 30 | #define SK_DATA 3 /* data pending */ |
31 | #define SK_TEMP 4 /* temp (TCP) socket */ | 31 | #define SK_TEMP 4 /* temp (TCP) socket */ |
32 | #define SK_DEAD 6 /* socket closed */ | 32 | #define SK_DEAD 6 /* socket closed */ |
33 | #define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */ | 33 | #define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */ |
34 | #define SK_DEFERRED 8 /* request on sk_deferred */ | 34 | #define SK_DEFERRED 8 /* request on sk_deferred */ |
35 | #define SK_OLD 9 /* used for temp socket aging mark+sweep */ | 35 | #define SK_OLD 9 /* used for temp socket aging mark+sweep */ |
36 | #define SK_DETACHED 10 /* detached from tempsocks list */ | 36 | #define SK_DETACHED 10 /* detached from tempsocks list */ |
37 | 37 | ||
38 | atomic_t sk_reserved; /* space on outq that is reserved */ | 38 | atomic_t sk_reserved; /* space on outq that is reserved */ |
39 | 39 | ||
40 | spinlock_t sk_defer_lock; /* protects sk_deferred */ | 40 | spinlock_t sk_defer_lock; /* protects sk_deferred */ |
41 | struct list_head sk_deferred; /* deferred requests that need to | 41 | struct list_head sk_deferred; /* deferred requests that need to |
42 | * be revisted */ | 42 | * be revisted */ |
43 | struct mutex sk_mutex; /* to serialize sending data */ | 43 | struct mutex sk_mutex; /* to serialize sending data */ |
44 | 44 | ||
45 | int (*sk_recvfrom)(struct svc_rqst *rqstp); | 45 | int (*sk_recvfrom)(struct svc_rqst *rqstp); |
46 | int (*sk_sendto)(struct svc_rqst *rqstp); | 46 | int (*sk_sendto)(struct svc_rqst *rqstp); |
47 | 47 | ||
48 | /* We keep the old state_change and data_ready CB's here */ | 48 | /* We keep the old state_change and data_ready CB's here */ |
49 | void (*sk_ostate)(struct sock *); | 49 | void (*sk_ostate)(struct sock *); |
50 | void (*sk_odata)(struct sock *, int bytes); | 50 | void (*sk_odata)(struct sock *, int bytes); |
51 | void (*sk_owspace)(struct sock *); | 51 | void (*sk_owspace)(struct sock *); |
52 | 52 | ||
53 | /* private TCP part */ | 53 | /* private TCP part */ |
54 | int sk_reclen; /* length of record */ | 54 | int sk_reclen; /* length of record */ |
55 | int sk_tcplen; /* current read length */ | 55 | int sk_tcplen; /* current read length */ |
56 | time_t sk_lastrecv; /* time of last received request */ | 56 | time_t sk_lastrecv; /* time of last received request */ |
57 | |||
58 | /* cache of various info for TCP sockets */ | ||
59 | void *sk_info_authunix; | ||
57 | }; | 60 | }; |
58 | 61 | ||
59 | /* | 62 | /* |
60 | * Function prototypes. | 63 | * Function prototypes. |
61 | */ | 64 | */ |
62 | int svc_makesock(struct svc_serv *, int, unsigned short); | 65 | int svc_makesock(struct svc_serv *, int, unsigned short); |
63 | void svc_delete_socket(struct svc_sock *); | 66 | void svc_delete_socket(struct svc_sock *); |
64 | int svc_recv(struct svc_rqst *, long); | 67 | int svc_recv(struct svc_rqst *, long); |
65 | int svc_send(struct svc_rqst *); | 68 | int svc_send(struct svc_rqst *); |
66 | void svc_drop(struct svc_rqst *); | 69 | void svc_drop(struct svc_rqst *); |
67 | void svc_sock_update_bufs(struct svc_serv *serv); | 70 | void svc_sock_update_bufs(struct svc_serv *serv); |
68 | int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); | 71 | int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); |
69 | int svc_addsock(struct svc_serv *serv, | 72 | int svc_addsock(struct svc_serv *serv, |
70 | int fd, | 73 | int fd, |
71 | char *name_return, | 74 | char *name_return, |
72 | int *proto); | 75 | int *proto); |
73 | 76 | ||
74 | #endif /* SUNRPC_SVCSOCK_H */ | 77 | #endif /* SUNRPC_SVCSOCK_H */ |
75 | 78 |
net/sunrpc/svcauth_unix.c
1 | #include <linux/types.h> | 1 | #include <linux/types.h> |
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/sunrpc/types.h> | 4 | #include <linux/sunrpc/types.h> |
5 | #include <linux/sunrpc/xdr.h> | 5 | #include <linux/sunrpc/xdr.h> |
6 | #include <linux/sunrpc/svcsock.h> | 6 | #include <linux/sunrpc/svcsock.h> |
7 | #include <linux/sunrpc/svcauth.h> | 7 | #include <linux/sunrpc/svcauth.h> |
8 | #include <linux/err.h> | 8 | #include <linux/err.h> |
9 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
10 | #include <linux/hash.h> | 10 | #include <linux/hash.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <net/sock.h> | ||
12 | 13 | ||
13 | #define RPCDBG_FACILITY RPCDBG_AUTH | 14 | #define RPCDBG_FACILITY RPCDBG_AUTH |
14 | 15 | ||
15 | 16 | ||
16 | /* | 17 | /* |
17 | * AUTHUNIX and AUTHNULL credentials are both handled here. | 18 | * AUTHUNIX and AUTHNULL credentials are both handled here. |
18 | * AUTHNULL is treated just like AUTHUNIX except that the uid/gid | 19 | * AUTHNULL is treated just like AUTHUNIX except that the uid/gid |
19 | * are always nobody (-2). i.e. we do the same IP address checks for | 20 | * are always nobody (-2). i.e. we do the same IP address checks for |
20 | * AUTHNULL as for AUTHUNIX, and that is done here. | 21 | * AUTHNULL as for AUTHUNIX, and that is done here. |
21 | */ | 22 | */ |
22 | 23 | ||
23 | 24 | ||
24 | struct unix_domain { | 25 | struct unix_domain { |
25 | struct auth_domain h; | 26 | struct auth_domain h; |
26 | int addr_changes; | 27 | int addr_changes; |
27 | /* other stuff later */ | 28 | /* other stuff later */ |
28 | }; | 29 | }; |
29 | 30 | ||
30 | extern struct auth_ops svcauth_unix; | 31 | extern struct auth_ops svcauth_unix; |
31 | 32 | ||
32 | struct auth_domain *unix_domain_find(char *name) | 33 | struct auth_domain *unix_domain_find(char *name) |
33 | { | 34 | { |
34 | struct auth_domain *rv; | 35 | struct auth_domain *rv; |
35 | struct unix_domain *new = NULL; | 36 | struct unix_domain *new = NULL; |
36 | 37 | ||
37 | rv = auth_domain_lookup(name, NULL); | 38 | rv = auth_domain_lookup(name, NULL); |
38 | while(1) { | 39 | while(1) { |
39 | if (rv) { | 40 | if (rv) { |
40 | if (new && rv != &new->h) | 41 | if (new && rv != &new->h) |
41 | auth_domain_put(&new->h); | 42 | auth_domain_put(&new->h); |
42 | 43 | ||
43 | if (rv->flavour != &svcauth_unix) { | 44 | if (rv->flavour != &svcauth_unix) { |
44 | auth_domain_put(rv); | 45 | auth_domain_put(rv); |
45 | return NULL; | 46 | return NULL; |
46 | } | 47 | } |
47 | return rv; | 48 | return rv; |
48 | } | 49 | } |
49 | 50 | ||
50 | new = kmalloc(sizeof(*new), GFP_KERNEL); | 51 | new = kmalloc(sizeof(*new), GFP_KERNEL); |
51 | if (new == NULL) | 52 | if (new == NULL) |
52 | return NULL; | 53 | return NULL; |
53 | kref_init(&new->h.ref); | 54 | kref_init(&new->h.ref); |
54 | new->h.name = kstrdup(name, GFP_KERNEL); | 55 | new->h.name = kstrdup(name, GFP_KERNEL); |
55 | new->h.flavour = &svcauth_unix; | 56 | new->h.flavour = &svcauth_unix; |
56 | new->addr_changes = 0; | 57 | new->addr_changes = 0; |
57 | rv = auth_domain_lookup(name, &new->h); | 58 | rv = auth_domain_lookup(name, &new->h); |
58 | } | 59 | } |
59 | } | 60 | } |
60 | 61 | ||
61 | static void svcauth_unix_domain_release(struct auth_domain *dom) | 62 | static void svcauth_unix_domain_release(struct auth_domain *dom) |
62 | { | 63 | { |
63 | struct unix_domain *ud = container_of(dom, struct unix_domain, h); | 64 | struct unix_domain *ud = container_of(dom, struct unix_domain, h); |
64 | 65 | ||
65 | kfree(dom->name); | 66 | kfree(dom->name); |
66 | kfree(ud); | 67 | kfree(ud); |
67 | } | 68 | } |
68 | 69 | ||
69 | 70 | ||
70 | /************************************************** | 71 | /************************************************** |
71 | * cache for IP address to unix_domain | 72 | * cache for IP address to unix_domain |
72 | * as needed by AUTH_UNIX | 73 | * as needed by AUTH_UNIX |
73 | */ | 74 | */ |
74 | #define IP_HASHBITS 8 | 75 | #define IP_HASHBITS 8 |
75 | #define IP_HASHMAX (1<<IP_HASHBITS) | 76 | #define IP_HASHMAX (1<<IP_HASHBITS) |
76 | #define IP_HASHMASK (IP_HASHMAX-1) | 77 | #define IP_HASHMASK (IP_HASHMAX-1) |
77 | 78 | ||
78 | struct ip_map { | 79 | struct ip_map { |
79 | struct cache_head h; | 80 | struct cache_head h; |
80 | char m_class[8]; /* e.g. "nfsd" */ | 81 | char m_class[8]; /* e.g. "nfsd" */ |
81 | struct in_addr m_addr; | 82 | struct in_addr m_addr; |
82 | struct unix_domain *m_client; | 83 | struct unix_domain *m_client; |
83 | int m_add_change; | 84 | int m_add_change; |
84 | }; | 85 | }; |
85 | static struct cache_head *ip_table[IP_HASHMAX]; | 86 | static struct cache_head *ip_table[IP_HASHMAX]; |
86 | 87 | ||
87 | static void ip_map_put(struct kref *kref) | 88 | static void ip_map_put(struct kref *kref) |
88 | { | 89 | { |
89 | struct cache_head *item = container_of(kref, struct cache_head, ref); | 90 | struct cache_head *item = container_of(kref, struct cache_head, ref); |
90 | struct ip_map *im = container_of(item, struct ip_map,h); | 91 | struct ip_map *im = container_of(item, struct ip_map,h); |
91 | 92 | ||
92 | if (test_bit(CACHE_VALID, &item->flags) && | 93 | if (test_bit(CACHE_VALID, &item->flags) && |
93 | !test_bit(CACHE_NEGATIVE, &item->flags)) | 94 | !test_bit(CACHE_NEGATIVE, &item->flags)) |
94 | auth_domain_put(&im->m_client->h); | 95 | auth_domain_put(&im->m_client->h); |
95 | kfree(im); | 96 | kfree(im); |
96 | } | 97 | } |
97 | 98 | ||
98 | #if IP_HASHBITS == 8 | 99 | #if IP_HASHBITS == 8 |
99 | /* hash_long on a 64 bit machine is currently REALLY BAD for | 100 | /* hash_long on a 64 bit machine is currently REALLY BAD for |
100 | * IP addresses in reverse-endian (i.e. on a little-endian machine). | 101 | * IP addresses in reverse-endian (i.e. on a little-endian machine). |
101 | * So use a trivial but reliable hash instead | 102 | * So use a trivial but reliable hash instead |
102 | */ | 103 | */ |
103 | static inline int hash_ip(unsigned long ip) | 104 | static inline int hash_ip(unsigned long ip) |
104 | { | 105 | { |
105 | int hash = ip ^ (ip>>16); | 106 | int hash = ip ^ (ip>>16); |
106 | return (hash ^ (hash>>8)) & 0xff; | 107 | return (hash ^ (hash>>8)) & 0xff; |
107 | } | 108 | } |
108 | #endif | 109 | #endif |
109 | static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) | 110 | static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) |
110 | { | 111 | { |
111 | struct ip_map *orig = container_of(corig, struct ip_map, h); | 112 | struct ip_map *orig = container_of(corig, struct ip_map, h); |
112 | struct ip_map *new = container_of(cnew, struct ip_map, h); | 113 | struct ip_map *new = container_of(cnew, struct ip_map, h); |
113 | return strcmp(orig->m_class, new->m_class) == 0 | 114 | return strcmp(orig->m_class, new->m_class) == 0 |
114 | && orig->m_addr.s_addr == new->m_addr.s_addr; | 115 | && orig->m_addr.s_addr == new->m_addr.s_addr; |
115 | } | 116 | } |
116 | static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) | 117 | static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) |
117 | { | 118 | { |
118 | struct ip_map *new = container_of(cnew, struct ip_map, h); | 119 | struct ip_map *new = container_of(cnew, struct ip_map, h); |
119 | struct ip_map *item = container_of(citem, struct ip_map, h); | 120 | struct ip_map *item = container_of(citem, struct ip_map, h); |
120 | 121 | ||
121 | strcpy(new->m_class, item->m_class); | 122 | strcpy(new->m_class, item->m_class); |
122 | new->m_addr.s_addr = item->m_addr.s_addr; | 123 | new->m_addr.s_addr = item->m_addr.s_addr; |
123 | } | 124 | } |
124 | static void update(struct cache_head *cnew, struct cache_head *citem) | 125 | static void update(struct cache_head *cnew, struct cache_head *citem) |
125 | { | 126 | { |
126 | struct ip_map *new = container_of(cnew, struct ip_map, h); | 127 | struct ip_map *new = container_of(cnew, struct ip_map, h); |
127 | struct ip_map *item = container_of(citem, struct ip_map, h); | 128 | struct ip_map *item = container_of(citem, struct ip_map, h); |
128 | 129 | ||
129 | kref_get(&item->m_client->h.ref); | 130 | kref_get(&item->m_client->h.ref); |
130 | new->m_client = item->m_client; | 131 | new->m_client = item->m_client; |
131 | new->m_add_change = item->m_add_change; | 132 | new->m_add_change = item->m_add_change; |
132 | } | 133 | } |
133 | static struct cache_head *ip_map_alloc(void) | 134 | static struct cache_head *ip_map_alloc(void) |
134 | { | 135 | { |
135 | struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL); | 136 | struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL); |
136 | if (i) | 137 | if (i) |
137 | return &i->h; | 138 | return &i->h; |
138 | else | 139 | else |
139 | return NULL; | 140 | return NULL; |
140 | } | 141 | } |
141 | 142 | ||
142 | static void ip_map_request(struct cache_detail *cd, | 143 | static void ip_map_request(struct cache_detail *cd, |
143 | struct cache_head *h, | 144 | struct cache_head *h, |
144 | char **bpp, int *blen) | 145 | char **bpp, int *blen) |
145 | { | 146 | { |
146 | char text_addr[20]; | 147 | char text_addr[20]; |
147 | struct ip_map *im = container_of(h, struct ip_map, h); | 148 | struct ip_map *im = container_of(h, struct ip_map, h); |
148 | __be32 addr = im->m_addr.s_addr; | 149 | __be32 addr = im->m_addr.s_addr; |
149 | 150 | ||
150 | snprintf(text_addr, 20, "%u.%u.%u.%u", | 151 | snprintf(text_addr, 20, "%u.%u.%u.%u", |
151 | ntohl(addr) >> 24 & 0xff, | 152 | ntohl(addr) >> 24 & 0xff, |
152 | ntohl(addr) >> 16 & 0xff, | 153 | ntohl(addr) >> 16 & 0xff, |
153 | ntohl(addr) >> 8 & 0xff, | 154 | ntohl(addr) >> 8 & 0xff, |
154 | ntohl(addr) >> 0 & 0xff); | 155 | ntohl(addr) >> 0 & 0xff); |
155 | 156 | ||
156 | qword_add(bpp, blen, im->m_class); | 157 | qword_add(bpp, blen, im->m_class); |
157 | qword_add(bpp, blen, text_addr); | 158 | qword_add(bpp, blen, text_addr); |
158 | (*bpp)[-1] = '\n'; | 159 | (*bpp)[-1] = '\n'; |
159 | } | 160 | } |
160 | 161 | ||
161 | static struct ip_map *ip_map_lookup(char *class, struct in_addr addr); | 162 | static struct ip_map *ip_map_lookup(char *class, struct in_addr addr); |
162 | static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); | 163 | static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); |
163 | 164 | ||
164 | static int ip_map_parse(struct cache_detail *cd, | 165 | static int ip_map_parse(struct cache_detail *cd, |
165 | char *mesg, int mlen) | 166 | char *mesg, int mlen) |
166 | { | 167 | { |
167 | /* class ipaddress [domainname] */ | 168 | /* class ipaddress [domainname] */ |
168 | /* should be safe just to use the start of the input buffer | 169 | /* should be safe just to use the start of the input buffer |
169 | * for scratch: */ | 170 | * for scratch: */ |
170 | char *buf = mesg; | 171 | char *buf = mesg; |
171 | int len; | 172 | int len; |
172 | int b1,b2,b3,b4; | 173 | int b1,b2,b3,b4; |
173 | char c; | 174 | char c; |
174 | char class[8]; | 175 | char class[8]; |
175 | struct in_addr addr; | 176 | struct in_addr addr; |
176 | int err; | 177 | int err; |
177 | 178 | ||
178 | struct ip_map *ipmp; | 179 | struct ip_map *ipmp; |
179 | struct auth_domain *dom; | 180 | struct auth_domain *dom; |
180 | time_t expiry; | 181 | time_t expiry; |
181 | 182 | ||
182 | if (mesg[mlen-1] != '\n') | 183 | if (mesg[mlen-1] != '\n') |
183 | return -EINVAL; | 184 | return -EINVAL; |
184 | mesg[mlen-1] = 0; | 185 | mesg[mlen-1] = 0; |
185 | 186 | ||
186 | /* class */ | 187 | /* class */ |
187 | len = qword_get(&mesg, class, sizeof(class)); | 188 | len = qword_get(&mesg, class, sizeof(class)); |
188 | if (len <= 0) return -EINVAL; | 189 | if (len <= 0) return -EINVAL; |
189 | 190 | ||
190 | /* ip address */ | 191 | /* ip address */ |
191 | len = qword_get(&mesg, buf, mlen); | 192 | len = qword_get(&mesg, buf, mlen); |
192 | if (len <= 0) return -EINVAL; | 193 | if (len <= 0) return -EINVAL; |
193 | 194 | ||
194 | if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) | 195 | if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) |
195 | return -EINVAL; | 196 | return -EINVAL; |
196 | 197 | ||
197 | expiry = get_expiry(&mesg); | 198 | expiry = get_expiry(&mesg); |
198 | if (expiry ==0) | 199 | if (expiry ==0) |
199 | return -EINVAL; | 200 | return -EINVAL; |
200 | 201 | ||
201 | /* domainname, or empty for NEGATIVE */ | 202 | /* domainname, or empty for NEGATIVE */ |
202 | len = qword_get(&mesg, buf, mlen); | 203 | len = qword_get(&mesg, buf, mlen); |
203 | if (len < 0) return -EINVAL; | 204 | if (len < 0) return -EINVAL; |
204 | 205 | ||
205 | if (len) { | 206 | if (len) { |
206 | dom = unix_domain_find(buf); | 207 | dom = unix_domain_find(buf); |
207 | if (dom == NULL) | 208 | if (dom == NULL) |
208 | return -ENOENT; | 209 | return -ENOENT; |
209 | } else | 210 | } else |
210 | dom = NULL; | 211 | dom = NULL; |
211 | 212 | ||
212 | addr.s_addr = | 213 | addr.s_addr = |
213 | htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); | 214 | htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); |
214 | 215 | ||
215 | ipmp = ip_map_lookup(class,addr); | 216 | ipmp = ip_map_lookup(class,addr); |
216 | if (ipmp) { | 217 | if (ipmp) { |
217 | err = ip_map_update(ipmp, | 218 | err = ip_map_update(ipmp, |
218 | container_of(dom, struct unix_domain, h), | 219 | container_of(dom, struct unix_domain, h), |
219 | expiry); | 220 | expiry); |
220 | } else | 221 | } else |
221 | err = -ENOMEM; | 222 | err = -ENOMEM; |
222 | 223 | ||
223 | if (dom) | 224 | if (dom) |
224 | auth_domain_put(dom); | 225 | auth_domain_put(dom); |
225 | 226 | ||
226 | cache_flush(); | 227 | cache_flush(); |
227 | return err; | 228 | return err; |
228 | } | 229 | } |
229 | 230 | ||
230 | static int ip_map_show(struct seq_file *m, | 231 | static int ip_map_show(struct seq_file *m, |
231 | struct cache_detail *cd, | 232 | struct cache_detail *cd, |
232 | struct cache_head *h) | 233 | struct cache_head *h) |
233 | { | 234 | { |
234 | struct ip_map *im; | 235 | struct ip_map *im; |
235 | struct in_addr addr; | 236 | struct in_addr addr; |
236 | char *dom = "-no-domain-"; | 237 | char *dom = "-no-domain-"; |
237 | 238 | ||
238 | if (h == NULL) { | 239 | if (h == NULL) { |
239 | seq_puts(m, "#class IP domain\n"); | 240 | seq_puts(m, "#class IP domain\n"); |
240 | return 0; | 241 | return 0; |
241 | } | 242 | } |
242 | im = container_of(h, struct ip_map, h); | 243 | im = container_of(h, struct ip_map, h); |
243 | /* class addr domain */ | 244 | /* class addr domain */ |
244 | addr = im->m_addr; | 245 | addr = im->m_addr; |
245 | 246 | ||
246 | if (test_bit(CACHE_VALID, &h->flags) && | 247 | if (test_bit(CACHE_VALID, &h->flags) && |
247 | !test_bit(CACHE_NEGATIVE, &h->flags)) | 248 | !test_bit(CACHE_NEGATIVE, &h->flags)) |
248 | dom = im->m_client->h.name; | 249 | dom = im->m_client->h.name; |
249 | 250 | ||
250 | seq_printf(m, "%s %d.%d.%d.%d %s\n", | 251 | seq_printf(m, "%s %d.%d.%d.%d %s\n", |
251 | im->m_class, | 252 | im->m_class, |
252 | ntohl(addr.s_addr) >> 24 & 0xff, | 253 | ntohl(addr.s_addr) >> 24 & 0xff, |
253 | ntohl(addr.s_addr) >> 16 & 0xff, | 254 | ntohl(addr.s_addr) >> 16 & 0xff, |
254 | ntohl(addr.s_addr) >> 8 & 0xff, | 255 | ntohl(addr.s_addr) >> 8 & 0xff, |
255 | ntohl(addr.s_addr) >> 0 & 0xff, | 256 | ntohl(addr.s_addr) >> 0 & 0xff, |
256 | dom | 257 | dom |
257 | ); | 258 | ); |
258 | return 0; | 259 | return 0; |
259 | } | 260 | } |
260 | 261 | ||
261 | 262 | ||
262 | struct cache_detail ip_map_cache = { | 263 | struct cache_detail ip_map_cache = { |
263 | .owner = THIS_MODULE, | 264 | .owner = THIS_MODULE, |
264 | .hash_size = IP_HASHMAX, | 265 | .hash_size = IP_HASHMAX, |
265 | .hash_table = ip_table, | 266 | .hash_table = ip_table, |
266 | .name = "auth.unix.ip", | 267 | .name = "auth.unix.ip", |
267 | .cache_put = ip_map_put, | 268 | .cache_put = ip_map_put, |
268 | .cache_request = ip_map_request, | 269 | .cache_request = ip_map_request, |
269 | .cache_parse = ip_map_parse, | 270 | .cache_parse = ip_map_parse, |
270 | .cache_show = ip_map_show, | 271 | .cache_show = ip_map_show, |
271 | .match = ip_map_match, | 272 | .match = ip_map_match, |
272 | .init = ip_map_init, | 273 | .init = ip_map_init, |
273 | .update = update, | 274 | .update = update, |
274 | .alloc = ip_map_alloc, | 275 | .alloc = ip_map_alloc, |
275 | }; | 276 | }; |
276 | 277 | ||
277 | static struct ip_map *ip_map_lookup(char *class, struct in_addr addr) | 278 | static struct ip_map *ip_map_lookup(char *class, struct in_addr addr) |
278 | { | 279 | { |
279 | struct ip_map ip; | 280 | struct ip_map ip; |
280 | struct cache_head *ch; | 281 | struct cache_head *ch; |
281 | 282 | ||
282 | strcpy(ip.m_class, class); | 283 | strcpy(ip.m_class, class); |
283 | ip.m_addr = addr; | 284 | ip.m_addr = addr; |
284 | ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, | 285 | ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, |
285 | hash_str(class, IP_HASHBITS) ^ | 286 | hash_str(class, IP_HASHBITS) ^ |
286 | hash_ip((unsigned long)addr.s_addr)); | 287 | hash_ip((unsigned long)addr.s_addr)); |
287 | 288 | ||
288 | if (ch) | 289 | if (ch) |
289 | return container_of(ch, struct ip_map, h); | 290 | return container_of(ch, struct ip_map, h); |
290 | else | 291 | else |
291 | return NULL; | 292 | return NULL; |
292 | } | 293 | } |
293 | 294 | ||
294 | static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) | 295 | static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) |
295 | { | 296 | { |
296 | struct ip_map ip; | 297 | struct ip_map ip; |
297 | struct cache_head *ch; | 298 | struct cache_head *ch; |
298 | 299 | ||
299 | ip.m_client = udom; | 300 | ip.m_client = udom; |
300 | ip.h.flags = 0; | 301 | ip.h.flags = 0; |
301 | if (!udom) | 302 | if (!udom) |
302 | set_bit(CACHE_NEGATIVE, &ip.h.flags); | 303 | set_bit(CACHE_NEGATIVE, &ip.h.flags); |
303 | else { | 304 | else { |
304 | ip.m_add_change = udom->addr_changes; | 305 | ip.m_add_change = udom->addr_changes; |
305 | /* if this is from the legacy set_client system call, | 306 | /* if this is from the legacy set_client system call, |
306 | * we need m_add_change to be one higher | 307 | * we need m_add_change to be one higher |
307 | */ | 308 | */ |
308 | if (expiry == NEVER) | 309 | if (expiry == NEVER) |
309 | ip.m_add_change++; | 310 | ip.m_add_change++; |
310 | } | 311 | } |
311 | ip.h.expiry_time = expiry; | 312 | ip.h.expiry_time = expiry; |
312 | ch = sunrpc_cache_update(&ip_map_cache, | 313 | ch = sunrpc_cache_update(&ip_map_cache, |
313 | &ip.h, &ipm->h, | 314 | &ip.h, &ipm->h, |
314 | hash_str(ipm->m_class, IP_HASHBITS) ^ | 315 | hash_str(ipm->m_class, IP_HASHBITS) ^ |
315 | hash_ip((unsigned long)ipm->m_addr.s_addr)); | 316 | hash_ip((unsigned long)ipm->m_addr.s_addr)); |
316 | if (!ch) | 317 | if (!ch) |
317 | return -ENOMEM; | 318 | return -ENOMEM; |
318 | cache_put(ch, &ip_map_cache); | 319 | cache_put(ch, &ip_map_cache); |
319 | return 0; | 320 | return 0; |
320 | } | 321 | } |
321 | 322 | ||
322 | int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) | 323 | int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) |
323 | { | 324 | { |
324 | struct unix_domain *udom; | 325 | struct unix_domain *udom; |
325 | struct ip_map *ipmp; | 326 | struct ip_map *ipmp; |
326 | 327 | ||
327 | if (dom->flavour != &svcauth_unix) | 328 | if (dom->flavour != &svcauth_unix) |
328 | return -EINVAL; | 329 | return -EINVAL; |
329 | udom = container_of(dom, struct unix_domain, h); | 330 | udom = container_of(dom, struct unix_domain, h); |
330 | ipmp = ip_map_lookup("nfsd", addr); | 331 | ipmp = ip_map_lookup("nfsd", addr); |
331 | 332 | ||
332 | if (ipmp) | 333 | if (ipmp) |
333 | return ip_map_update(ipmp, udom, NEVER); | 334 | return ip_map_update(ipmp, udom, NEVER); |
334 | else | 335 | else |
335 | return -ENOMEM; | 336 | return -ENOMEM; |
336 | } | 337 | } |
337 | 338 | ||
338 | int auth_unix_forget_old(struct auth_domain *dom) | 339 | int auth_unix_forget_old(struct auth_domain *dom) |
339 | { | 340 | { |
340 | struct unix_domain *udom; | 341 | struct unix_domain *udom; |
341 | 342 | ||
342 | if (dom->flavour != &svcauth_unix) | 343 | if (dom->flavour != &svcauth_unix) |
343 | return -EINVAL; | 344 | return -EINVAL; |
344 | udom = container_of(dom, struct unix_domain, h); | 345 | udom = container_of(dom, struct unix_domain, h); |
345 | udom->addr_changes++; | 346 | udom->addr_changes++; |
346 | return 0; | 347 | return 0; |
347 | } | 348 | } |
348 | 349 | ||
349 | struct auth_domain *auth_unix_lookup(struct in_addr addr) | 350 | struct auth_domain *auth_unix_lookup(struct in_addr addr) |
350 | { | 351 | { |
351 | struct ip_map *ipm; | 352 | struct ip_map *ipm; |
352 | struct auth_domain *rv; | 353 | struct auth_domain *rv; |
353 | 354 | ||
354 | ipm = ip_map_lookup("nfsd", addr); | 355 | ipm = ip_map_lookup("nfsd", addr); |
355 | 356 | ||
356 | if (!ipm) | 357 | if (!ipm) |
357 | return NULL; | 358 | return NULL; |
358 | if (cache_check(&ip_map_cache, &ipm->h, NULL)) | 359 | if (cache_check(&ip_map_cache, &ipm->h, NULL)) |
359 | return NULL; | 360 | return NULL; |
360 | 361 | ||
361 | if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { | 362 | if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { |
362 | if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0) | 363 | if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0) |
363 | auth_domain_put(&ipm->m_client->h); | 364 | auth_domain_put(&ipm->m_client->h); |
364 | rv = NULL; | 365 | rv = NULL; |
365 | } else { | 366 | } else { |
366 | rv = &ipm->m_client->h; | 367 | rv = &ipm->m_client->h; |
367 | kref_get(&rv->ref); | 368 | kref_get(&rv->ref); |
368 | } | 369 | } |
369 | cache_put(&ipm->h, &ip_map_cache); | 370 | cache_put(&ipm->h, &ip_map_cache); |
370 | return rv; | 371 | return rv; |
371 | } | 372 | } |
372 | 373 | ||
373 | void svcauth_unix_purge(void) | 374 | void svcauth_unix_purge(void) |
374 | { | 375 | { |
375 | cache_purge(&ip_map_cache); | 376 | cache_purge(&ip_map_cache); |
376 | } | 377 | } |
377 | 378 | ||
379 | static inline struct ip_map * | ||
380 | ip_map_cached_get(struct svc_rqst *rqstp) | ||
381 | { | ||
382 | struct ip_map *ipm = rqstp->rq_sock->sk_info_authunix; | ||
383 | if (ipm != NULL) { | ||
384 | if (!cache_valid(&ipm->h)) { | ||
385 | /* | ||
386 | * The entry has been invalidated since it was | ||
387 | * remembered, e.g. by a second mount from the | ||
388 | * same IP address. | ||
389 | */ | ||
390 | rqstp->rq_sock->sk_info_authunix = NULL; | ||
391 | cache_put(&ipm->h, &ip_map_cache); | ||
392 | return NULL; | ||
393 | } | ||
394 | cache_get(&ipm->h); | ||
395 | } | ||
396 | return ipm; | ||
397 | } | ||
398 | |||
399 | static inline void | ||
400 | ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) | ||
401 | { | ||
402 | struct svc_sock *svsk = rqstp->rq_sock; | ||
403 | |||
404 | if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL) | ||
405 | svsk->sk_info_authunix = ipm; /* newly cached, keep the reference */ | ||
406 | else | ||
407 | cache_put(&ipm->h, &ip_map_cache); | ||
408 | } | ||
409 | |||
410 | void | ||
411 | svcauth_unix_info_release(void *info) | ||
412 | { | ||
413 | struct ip_map *ipm = info; | ||
414 | cache_put(&ipm->h, &ip_map_cache); | ||
415 | } | ||
416 | |||
378 | static int | 417 | static int |
379 | svcauth_unix_set_client(struct svc_rqst *rqstp) | 418 | svcauth_unix_set_client(struct svc_rqst *rqstp) |
380 | { | 419 | { |
381 | struct ip_map *ipm; | 420 | struct ip_map *ipm; |
382 | 421 | ||
383 | rqstp->rq_client = NULL; | 422 | rqstp->rq_client = NULL; |
384 | if (rqstp->rq_proc == 0) | 423 | if (rqstp->rq_proc == 0) |
385 | return SVC_OK; | 424 | return SVC_OK; |
386 | 425 | ||
387 | ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, | 426 | ipm = ip_map_cached_get(rqstp); |
388 | rqstp->rq_addr.sin_addr); | 427 | if (ipm == NULL) |
428 | ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, | ||
429 | rqstp->rq_addr.sin_addr); | ||
389 | 430 | ||
390 | if (ipm == NULL) | 431 | if (ipm == NULL) |
391 | return SVC_DENIED; | 432 | return SVC_DENIED; |
392 | 433 | ||
393 | switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { | 434 | switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { |
394 | default: | 435 | default: |
395 | BUG(); | 436 | BUG(); |
396 | case -EAGAIN: | 437 | case -EAGAIN: |
397 | return SVC_DROP; | 438 | return SVC_DROP; |
398 | case -ENOENT: | 439 | case -ENOENT: |
399 | return SVC_DENIED; | 440 | return SVC_DENIED; |
400 | case 0: | 441 | case 0: |
401 | rqstp->rq_client = &ipm->m_client->h; | 442 | rqstp->rq_client = &ipm->m_client->h; |
402 | kref_get(&rqstp->rq_client->ref); | 443 | kref_get(&rqstp->rq_client->ref); |
403 | cache_put(&ipm->h, &ip_map_cache); | 444 | ip_map_cached_put(rqstp, ipm); |
404 | break; | 445 | break; |
405 | } | 446 | } |
406 | return SVC_OK; | 447 | return SVC_OK; |
407 | } | 448 | } |
408 | 449 | ||
409 | static int | 450 | static int |
410 | svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) | 451 | svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) |
411 | { | 452 | { |
412 | struct kvec *argv = &rqstp->rq_arg.head[0]; | 453 | struct kvec *argv = &rqstp->rq_arg.head[0]; |
413 | struct kvec *resv = &rqstp->rq_res.head[0]; | 454 | struct kvec *resv = &rqstp->rq_res.head[0]; |
414 | struct svc_cred *cred = &rqstp->rq_cred; | 455 | struct svc_cred *cred = &rqstp->rq_cred; |
415 | 456 | ||
416 | cred->cr_group_info = NULL; | 457 | cred->cr_group_info = NULL; |
417 | rqstp->rq_client = NULL; | 458 | rqstp->rq_client = NULL; |
418 | 459 | ||
419 | if (argv->iov_len < 3*4) | 460 | if (argv->iov_len < 3*4) |
420 | return SVC_GARBAGE; | 461 | return SVC_GARBAGE; |
421 | 462 | ||
422 | if (svc_getu32(argv) != 0) { | 463 | if (svc_getu32(argv) != 0) { |
423 | dprintk("svc: bad null cred\n"); | 464 | dprintk("svc: bad null cred\n"); |
424 | *authp = rpc_autherr_badcred; | 465 | *authp = rpc_autherr_badcred; |
425 | return SVC_DENIED; | 466 | return SVC_DENIED; |
426 | } | 467 | } |
427 | if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { | 468 | if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { |
428 | dprintk("svc: bad null verf\n"); | 469 | dprintk("svc: bad null verf\n"); |
429 | *authp = rpc_autherr_badverf; | 470 | *authp = rpc_autherr_badverf; |
430 | return SVC_DENIED; | 471 | return SVC_DENIED; |
431 | } | 472 | } |
432 | 473 | ||
433 | /* Signal that mapping to nobody uid/gid is required */ | 474 | /* Signal that mapping to nobody uid/gid is required */ |
434 | cred->cr_uid = (uid_t) -1; | 475 | cred->cr_uid = (uid_t) -1; |
435 | cred->cr_gid = (gid_t) -1; | 476 | cred->cr_gid = (gid_t) -1; |
436 | cred->cr_group_info = groups_alloc(0); | 477 | cred->cr_group_info = groups_alloc(0); |
437 | if (cred->cr_group_info == NULL) | 478 | if (cred->cr_group_info == NULL) |
438 | return SVC_DROP; /* kmalloc failure - client must retry */ | 479 | return SVC_DROP; /* kmalloc failure - client must retry */ |
439 | 480 | ||
440 | /* Put NULL verifier */ | 481 | /* Put NULL verifier */ |
441 | svc_putnl(resv, RPC_AUTH_NULL); | 482 | svc_putnl(resv, RPC_AUTH_NULL); |
442 | svc_putnl(resv, 0); | 483 | svc_putnl(resv, 0); |
443 | 484 | ||
444 | return SVC_OK; | 485 | return SVC_OK; |
445 | } | 486 | } |
446 | 487 | ||
447 | static int | 488 | static int |
448 | svcauth_null_release(struct svc_rqst *rqstp) | 489 | svcauth_null_release(struct svc_rqst *rqstp) |
449 | { | 490 | { |
450 | if (rqstp->rq_client) | 491 | if (rqstp->rq_client) |
451 | auth_domain_put(rqstp->rq_client); | 492 | auth_domain_put(rqstp->rq_client); |
452 | rqstp->rq_client = NULL; | 493 | rqstp->rq_client = NULL; |
453 | if (rqstp->rq_cred.cr_group_info) | 494 | if (rqstp->rq_cred.cr_group_info) |
454 | put_group_info(rqstp->rq_cred.cr_group_info); | 495 | put_group_info(rqstp->rq_cred.cr_group_info); |
455 | rqstp->rq_cred.cr_group_info = NULL; | 496 | rqstp->rq_cred.cr_group_info = NULL; |
456 | 497 | ||
457 | return 0; /* don't drop */ | 498 | return 0; /* don't drop */ |
458 | } | 499 | } |
459 | 500 | ||
460 | 501 | ||
461 | struct auth_ops svcauth_null = { | 502 | struct auth_ops svcauth_null = { |
462 | .name = "null", | 503 | .name = "null", |
463 | .owner = THIS_MODULE, | 504 | .owner = THIS_MODULE, |
464 | .flavour = RPC_AUTH_NULL, | 505 | .flavour = RPC_AUTH_NULL, |
465 | .accept = svcauth_null_accept, | 506 | .accept = svcauth_null_accept, |
466 | .release = svcauth_null_release, | 507 | .release = svcauth_null_release, |
467 | .set_client = svcauth_unix_set_client, | 508 | .set_client = svcauth_unix_set_client, |
468 | }; | 509 | }; |
469 | 510 | ||
470 | 511 | ||
471 | static int | 512 | static int |
472 | svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) | 513 | svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) |
473 | { | 514 | { |
474 | struct kvec *argv = &rqstp->rq_arg.head[0]; | 515 | struct kvec *argv = &rqstp->rq_arg.head[0]; |
475 | struct kvec *resv = &rqstp->rq_res.head[0]; | 516 | struct kvec *resv = &rqstp->rq_res.head[0]; |
476 | struct svc_cred *cred = &rqstp->rq_cred; | 517 | struct svc_cred *cred = &rqstp->rq_cred; |
477 | u32 slen, i; | 518 | u32 slen, i; |
478 | int len = argv->iov_len; | 519 | int len = argv->iov_len; |
479 | 520 | ||
480 | cred->cr_group_info = NULL; | 521 | cred->cr_group_info = NULL; |
481 | rqstp->rq_client = NULL; | 522 | rqstp->rq_client = NULL; |
482 | 523 | ||
483 | if ((len -= 3*4) < 0) | 524 | if ((len -= 3*4) < 0) |
484 | return SVC_GARBAGE; | 525 | return SVC_GARBAGE; |
485 | 526 | ||
486 | svc_getu32(argv); /* length */ | 527 | svc_getu32(argv); /* length */ |
487 | svc_getu32(argv); /* time stamp */ | 528 | svc_getu32(argv); /* time stamp */ |
488 | slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */ | 529 | slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */ |
489 | if (slen > 64 || (len -= (slen + 3)*4) < 0) | 530 | if (slen > 64 || (len -= (slen + 3)*4) < 0) |
490 | goto badcred; | 531 | goto badcred; |
491 | argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */ | 532 | argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */ |
492 | argv->iov_len -= slen*4; | 533 | argv->iov_len -= slen*4; |
493 | 534 | ||
494 | cred->cr_uid = svc_getnl(argv); /* uid */ | 535 | cred->cr_uid = svc_getnl(argv); /* uid */ |
495 | cred->cr_gid = svc_getnl(argv); /* gid */ | 536 | cred->cr_gid = svc_getnl(argv); /* gid */ |
496 | slen = svc_getnl(argv); /* gids length */ | 537 | slen = svc_getnl(argv); /* gids length */ |
497 | if (slen > 16 || (len -= (slen + 2)*4) < 0) | 538 | if (slen > 16 || (len -= (slen + 2)*4) < 0) |
498 | goto badcred; | 539 | goto badcred; |
499 | cred->cr_group_info = groups_alloc(slen); | 540 | cred->cr_group_info = groups_alloc(slen); |
500 | if (cred->cr_group_info == NULL) | 541 | if (cred->cr_group_info == NULL) |
501 | return SVC_DROP; | 542 | return SVC_DROP; |
502 | for (i = 0; i < slen; i++) | 543 | for (i = 0; i < slen; i++) |
503 | GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); | 544 | GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); |
504 | 545 | ||
505 | if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { | 546 | if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { |
506 | *authp = rpc_autherr_badverf; | 547 | *authp = rpc_autherr_badverf; |
507 | return SVC_DENIED; | 548 | return SVC_DENIED; |
508 | } | 549 | } |
509 | 550 | ||
510 | /* Put NULL verifier */ | 551 | /* Put NULL verifier */ |
511 | svc_putnl(resv, RPC_AUTH_NULL); | 552 | svc_putnl(resv, RPC_AUTH_NULL); |
512 | svc_putnl(resv, 0); | 553 | svc_putnl(resv, 0); |
513 | 554 | ||
514 | return SVC_OK; | 555 | return SVC_OK; |
515 | 556 | ||
516 | badcred: | 557 | badcred: |
517 | *authp = rpc_autherr_badcred; | 558 | *authp = rpc_autherr_badcred; |
518 | return SVC_DENIED; | 559 | return SVC_DENIED; |
519 | } | 560 | } |
520 | 561 | ||
521 | static int | 562 | static int |
522 | svcauth_unix_release(struct svc_rqst *rqstp) | 563 | svcauth_unix_release(struct svc_rqst *rqstp) |
523 | { | 564 | { |
524 | /* Verifier (such as it is) is already in place. | 565 | /* Verifier (such as it is) is already in place. |
525 | */ | 566 | */ |
526 | if (rqstp->rq_client) | 567 | if (rqstp->rq_client) |
527 | auth_domain_put(rqstp->rq_client); | 568 | auth_domain_put(rqstp->rq_client); |
528 | rqstp->rq_client = NULL; | 569 | rqstp->rq_client = NULL; |
529 | if (rqstp->rq_cred.cr_group_info) | 570 | if (rqstp->rq_cred.cr_group_info) |
530 | put_group_info(rqstp->rq_cred.cr_group_info); | 571 | put_group_info(rqstp->rq_cred.cr_group_info); |
531 | rqstp->rq_cred.cr_group_info = NULL; | 572 | rqstp->rq_cred.cr_group_info = NULL; |
532 | 573 | ||
533 | return 0; | 574 | return 0; |
534 | } | 575 | } |
535 | 576 | ||
536 | 577 | ||
537 | struct auth_ops svcauth_unix = { | 578 | struct auth_ops svcauth_unix = { |
538 | .name = "unix", | 579 | .name = "unix", |
539 | .owner = THIS_MODULE, | 580 | .owner = THIS_MODULE, |
540 | .flavour = RPC_AUTH_UNIX, | 581 | .flavour = RPC_AUTH_UNIX, |
541 | .accept = svcauth_unix_accept, | 582 | .accept = svcauth_unix_accept, |
542 | .release = svcauth_unix_release, | 583 | .release = svcauth_unix_release, |
543 | .domain_release = svcauth_unix_domain_release, | 584 | .domain_release = svcauth_unix_domain_release, |
544 | .set_client = svcauth_unix_set_client, | 585 | .set_client = svcauth_unix_set_client, |
545 | }; | 586 | }; |
546 | 587 | ||
547 | 588 |
net/sunrpc/svcsock.c
1 | /* | 1 | /* |
2 | * linux/net/sunrpc/svcsock.c | 2 | * linux/net/sunrpc/svcsock.c |
3 | * | 3 | * |
4 | * These are the RPC server socket internals. | 4 | * These are the RPC server socket internals. |
5 | * | 5 | * |
6 | * The server scheduling algorithm does not always distribute the load | 6 | * The server scheduling algorithm does not always distribute the load |
7 | * evenly when servicing a single client. May need to modify the | 7 | * evenly when servicing a single client. May need to modify the |
8 | * svc_sock_enqueue procedure... | 8 | * svc_sock_enqueue procedure... |
9 | * | 9 | * |
10 | * TCP support is largely untested and may be a little slow. The problem | 10 | * TCP support is largely untested and may be a little slow. The problem |
11 | * is that we currently do two separate recvfrom's, one for the 4-byte | 11 | * is that we currently do two separate recvfrom's, one for the 4-byte |
12 | * record length, and the second for the actual record. This could possibly | 12 | * record length, and the second for the actual record. This could possibly |
13 | * be improved by always reading a minimum size of around 100 bytes and | 13 | * be improved by always reading a minimum size of around 100 bytes and |
14 | * tucking any superfluous bytes away in a temporary store. Still, that | 14 | * tucking any superfluous bytes away in a temporary store. Still, that |
15 | * leaves write requests out in the rain. An alternative may be to peek at | 15 | * leaves write requests out in the rain. An alternative may be to peek at |
16 | * the first skb in the queue, and if it matches the next TCP sequence | 16 | * the first skb in the queue, and if it matches the next TCP sequence |
17 | * number, to extract the record marker. Yuck. | 17 | * number, to extract the record marker. Yuck. |
18 | * | 18 | * |
19 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | 19 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/fcntl.h> | 24 | #include <linux/fcntl.h> |
25 | #include <linux/net.h> | 25 | #include <linux/net.h> |
26 | #include <linux/in.h> | 26 | #include <linux/in.h> |
27 | #include <linux/inet.h> | 27 | #include <linux/inet.h> |
28 | #include <linux/udp.h> | 28 | #include <linux/udp.h> |
29 | #include <linux/tcp.h> | 29 | #include <linux/tcp.h> |
30 | #include <linux/unistd.h> | 30 | #include <linux/unistd.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/netdevice.h> | 32 | #include <linux/netdevice.h> |
33 | #include <linux/skbuff.h> | 33 | #include <linux/skbuff.h> |
34 | #include <linux/file.h> | 34 | #include <linux/file.h> |
35 | #include <net/sock.h> | 35 | #include <net/sock.h> |
36 | #include <net/checksum.h> | 36 | #include <net/checksum.h> |
37 | #include <net/ip.h> | 37 | #include <net/ip.h> |
38 | #include <net/tcp_states.h> | 38 | #include <net/tcp_states.h> |
39 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
40 | #include <asm/ioctls.h> | 40 | #include <asm/ioctls.h> |
41 | 41 | ||
42 | #include <linux/sunrpc/types.h> | 42 | #include <linux/sunrpc/types.h> |
43 | #include <linux/sunrpc/xdr.h> | 43 | #include <linux/sunrpc/xdr.h> |
44 | #include <linux/sunrpc/svcsock.h> | 44 | #include <linux/sunrpc/svcsock.h> |
45 | #include <linux/sunrpc/stats.h> | 45 | #include <linux/sunrpc/stats.h> |
46 | 46 | ||
47 | /* SMP locking strategy: | 47 | /* SMP locking strategy: |
48 | * | 48 | * |
49 | * svc_pool->sp_lock protects most of the fields of that pool. | 49 | * svc_pool->sp_lock protects most of the fields of that pool. |
50 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | 50 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. |
51 | * when both need to be taken (rare), svc_serv->sv_lock is first. | 51 | * when both need to be taken (rare), svc_serv->sv_lock is first. |
52 | * BKL protects svc_serv->sv_nrthread. | 52 | * BKL protects svc_serv->sv_nrthread. |
53 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list | 53 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list |
54 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | 54 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. |
55 | * | 55 | * |
56 | * Some flags can be set to certain values at any time | 56 | * Some flags can be set to certain values at any time |
57 | * providing that certain rules are followed: | 57 | * providing that certain rules are followed: |
58 | * | 58 | * |
59 | * SK_CONN, SK_DATA, can be set or cleared at any time. | 59 | * SK_CONN, SK_DATA, can be set or cleared at any time. |
60 | * after a set, svc_sock_enqueue must be called. | 60 | * after a set, svc_sock_enqueue must be called. |
61 | * after a clear, the socket must be read/accepted | 61 | * after a clear, the socket must be read/accepted |
62 | * if this succeeds, it must be set again. | 62 | * if this succeeds, it must be set again. |
63 | * SK_CLOSE can set at any time. It is never cleared. | 63 | * SK_CLOSE can set at any time. It is never cleared. |
64 | * | 64 | * |
65 | */ | 65 | */ |
66 | 66 | ||
67 | #define RPCDBG_FACILITY RPCDBG_SVCSOCK | 67 | #define RPCDBG_FACILITY RPCDBG_SVCSOCK |
68 | 68 | ||
69 | 69 | ||
70 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, | 70 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, |
71 | int *errp, int pmap_reg); | 71 | int *errp, int pmap_reg); |
72 | static void svc_udp_data_ready(struct sock *, int); | 72 | static void svc_udp_data_ready(struct sock *, int); |
73 | static int svc_udp_recvfrom(struct svc_rqst *); | 73 | static int svc_udp_recvfrom(struct svc_rqst *); |
74 | static int svc_udp_sendto(struct svc_rqst *); | 74 | static int svc_udp_sendto(struct svc_rqst *); |
75 | 75 | ||
76 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); | 76 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); |
77 | static int svc_deferred_recv(struct svc_rqst *rqstp); | 77 | static int svc_deferred_recv(struct svc_rqst *rqstp); |
78 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | 78 | static struct cache_deferred_req *svc_defer(struct cache_req *req); |
79 | 79 | ||
80 | /* apparently the "standard" is that clients close | 80 | /* apparently the "standard" is that clients close |
81 | * idle connections after 5 minutes, servers after | 81 | * idle connections after 5 minutes, servers after |
82 | * 6 minutes | 82 | * 6 minutes |
83 | * http://www.connectathon.org/talks96/nfstcp.pdf | 83 | * http://www.connectathon.org/talks96/nfstcp.pdf |
84 | */ | 84 | */ |
85 | static int svc_conn_age_period = 6*60; | 85 | static int svc_conn_age_period = 6*60; |
86 | 86 | ||
87 | /* | 87 | /* |
88 | * Queue up an idle server thread. Must have pool->sp_lock held. | 88 | * Queue up an idle server thread. Must have pool->sp_lock held. |
89 | * Note: this is really a stack rather than a queue, so that we only | 89 | * Note: this is really a stack rather than a queue, so that we only |
90 | * use as many different threads as we need, and the rest don't pollute | 90 | * use as many different threads as we need, and the rest don't pollute |
91 | * the cache. | 91 | * the cache. |
92 | */ | 92 | */ |
93 | static inline void | 93 | static inline void |
94 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | 94 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) |
95 | { | 95 | { |
96 | list_add(&rqstp->rq_list, &pool->sp_threads); | 96 | list_add(&rqstp->rq_list, &pool->sp_threads); |
97 | } | 97 | } |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | 100 | * Dequeue an nfsd thread. Must have pool->sp_lock held. |
101 | */ | 101 | */ |
102 | static inline void | 102 | static inline void |
103 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | 103 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) |
104 | { | 104 | { |
105 | list_del(&rqstp->rq_list); | 105 | list_del(&rqstp->rq_list); |
106 | } | 106 | } |
107 | 107 | ||
108 | /* | 108 | /* |
109 | * Release an skbuff after use | 109 | * Release an skbuff after use |
110 | */ | 110 | */ |
111 | static inline void | 111 | static inline void |
112 | svc_release_skb(struct svc_rqst *rqstp) | 112 | svc_release_skb(struct svc_rqst *rqstp) |
113 | { | 113 | { |
114 | struct sk_buff *skb = rqstp->rq_skbuff; | 114 | struct sk_buff *skb = rqstp->rq_skbuff; |
115 | struct svc_deferred_req *dr = rqstp->rq_deferred; | 115 | struct svc_deferred_req *dr = rqstp->rq_deferred; |
116 | 116 | ||
117 | if (skb) { | 117 | if (skb) { |
118 | rqstp->rq_skbuff = NULL; | 118 | rqstp->rq_skbuff = NULL; |
119 | 119 | ||
120 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); | 120 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); |
121 | skb_free_datagram(rqstp->rq_sock->sk_sk, skb); | 121 | skb_free_datagram(rqstp->rq_sock->sk_sk, skb); |
122 | } | 122 | } |
123 | if (dr) { | 123 | if (dr) { |
124 | rqstp->rq_deferred = NULL; | 124 | rqstp->rq_deferred = NULL; |
125 | kfree(dr); | 125 | kfree(dr); |
126 | } | 126 | } |
127 | } | 127 | } |
128 | 128 | ||
129 | /* | 129 | /* |
130 | * Any space to write? | 130 | * Any space to write? |
131 | */ | 131 | */ |
132 | static inline unsigned long | 132 | static inline unsigned long |
133 | svc_sock_wspace(struct svc_sock *svsk) | 133 | svc_sock_wspace(struct svc_sock *svsk) |
134 | { | 134 | { |
135 | int wspace; | 135 | int wspace; |
136 | 136 | ||
137 | if (svsk->sk_sock->type == SOCK_STREAM) | 137 | if (svsk->sk_sock->type == SOCK_STREAM) |
138 | wspace = sk_stream_wspace(svsk->sk_sk); | 138 | wspace = sk_stream_wspace(svsk->sk_sk); |
139 | else | 139 | else |
140 | wspace = sock_wspace(svsk->sk_sk); | 140 | wspace = sock_wspace(svsk->sk_sk); |
141 | 141 | ||
142 | return wspace; | 142 | return wspace; |
143 | } | 143 | } |
144 | 144 | ||
145 | /* | 145 | /* |
146 | * Queue up a socket with data pending. If there are idle nfsd | 146 | * Queue up a socket with data pending. If there are idle nfsd |
147 | * processes, wake 'em up. | 147 | * processes, wake 'em up. |
148 | * | 148 | * |
149 | */ | 149 | */ |
150 | static void | 150 | static void |
151 | svc_sock_enqueue(struct svc_sock *svsk) | 151 | svc_sock_enqueue(struct svc_sock *svsk) |
152 | { | 152 | { |
153 | struct svc_serv *serv = svsk->sk_server; | 153 | struct svc_serv *serv = svsk->sk_server; |
154 | struct svc_pool *pool; | 154 | struct svc_pool *pool; |
155 | struct svc_rqst *rqstp; | 155 | struct svc_rqst *rqstp; |
156 | int cpu; | 156 | int cpu; |
157 | 157 | ||
158 | if (!(svsk->sk_flags & | 158 | if (!(svsk->sk_flags & |
159 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) | 159 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) |
160 | return; | 160 | return; |
161 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | 161 | if (test_bit(SK_DEAD, &svsk->sk_flags)) |
162 | return; | 162 | return; |
163 | 163 | ||
164 | cpu = get_cpu(); | 164 | cpu = get_cpu(); |
165 | pool = svc_pool_for_cpu(svsk->sk_server, cpu); | 165 | pool = svc_pool_for_cpu(svsk->sk_server, cpu); |
166 | put_cpu(); | 166 | put_cpu(); |
167 | 167 | ||
168 | spin_lock_bh(&pool->sp_lock); | 168 | spin_lock_bh(&pool->sp_lock); |
169 | 169 | ||
170 | if (!list_empty(&pool->sp_threads) && | 170 | if (!list_empty(&pool->sp_threads) && |
171 | !list_empty(&pool->sp_sockets)) | 171 | !list_empty(&pool->sp_sockets)) |
172 | printk(KERN_ERR | 172 | printk(KERN_ERR |
173 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | 173 | "svc_sock_enqueue: threads and sockets both waiting??\n"); |
174 | 174 | ||
175 | if (test_bit(SK_DEAD, &svsk->sk_flags)) { | 175 | if (test_bit(SK_DEAD, &svsk->sk_flags)) { |
176 | /* Don't enqueue dead sockets */ | 176 | /* Don't enqueue dead sockets */ |
177 | dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); | 177 | dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); |
178 | goto out_unlock; | 178 | goto out_unlock; |
179 | } | 179 | } |
180 | 180 | ||
181 | /* Mark socket as busy. It will remain in this state until the | 181 | /* Mark socket as busy. It will remain in this state until the |
182 | * server has processed all pending data and put the socket back | 182 | * server has processed all pending data and put the socket back |
183 | * on the idle list. We update SK_BUSY atomically because | 183 | * on the idle list. We update SK_BUSY atomically because |
184 | * it also guards against trying to enqueue the svc_sock twice. | 184 | * it also guards against trying to enqueue the svc_sock twice. |
185 | */ | 185 | */ |
186 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { | 186 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { |
187 | /* Don't enqueue socket while already enqueued */ | 187 | /* Don't enqueue socket while already enqueued */ |
188 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | 188 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); |
189 | goto out_unlock; | 189 | goto out_unlock; |
190 | } | 190 | } |
191 | BUG_ON(svsk->sk_pool != NULL); | 191 | BUG_ON(svsk->sk_pool != NULL); |
192 | svsk->sk_pool = pool; | 192 | svsk->sk_pool = pool; |
193 | 193 | ||
194 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | 194 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); |
195 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 | 195 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 |
196 | > svc_sock_wspace(svsk)) | 196 | > svc_sock_wspace(svsk)) |
197 | && !test_bit(SK_CLOSE, &svsk->sk_flags) | 197 | && !test_bit(SK_CLOSE, &svsk->sk_flags) |
198 | && !test_bit(SK_CONN, &svsk->sk_flags)) { | 198 | && !test_bit(SK_CONN, &svsk->sk_flags)) { |
199 | /* Don't enqueue while not enough space for reply */ | 199 | /* Don't enqueue while not enough space for reply */ |
200 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | 200 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", |
201 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, | 201 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, |
202 | svc_sock_wspace(svsk)); | 202 | svc_sock_wspace(svsk)); |
203 | svsk->sk_pool = NULL; | 203 | svsk->sk_pool = NULL; |
204 | clear_bit(SK_BUSY, &svsk->sk_flags); | 204 | clear_bit(SK_BUSY, &svsk->sk_flags); |
205 | goto out_unlock; | 205 | goto out_unlock; |
206 | } | 206 | } |
207 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | 207 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); |
208 | 208 | ||
209 | 209 | ||
210 | if (!list_empty(&pool->sp_threads)) { | 210 | if (!list_empty(&pool->sp_threads)) { |
211 | rqstp = list_entry(pool->sp_threads.next, | 211 | rqstp = list_entry(pool->sp_threads.next, |
212 | struct svc_rqst, | 212 | struct svc_rqst, |
213 | rq_list); | 213 | rq_list); |
214 | dprintk("svc: socket %p served by daemon %p\n", | 214 | dprintk("svc: socket %p served by daemon %p\n", |
215 | svsk->sk_sk, rqstp); | 215 | svsk->sk_sk, rqstp); |
216 | svc_thread_dequeue(pool, rqstp); | 216 | svc_thread_dequeue(pool, rqstp); |
217 | if (rqstp->rq_sock) | 217 | if (rqstp->rq_sock) |
218 | printk(KERN_ERR | 218 | printk(KERN_ERR |
219 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | 219 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", |
220 | rqstp, rqstp->rq_sock); | 220 | rqstp, rqstp->rq_sock); |
221 | rqstp->rq_sock = svsk; | 221 | rqstp->rq_sock = svsk; |
222 | atomic_inc(&svsk->sk_inuse); | 222 | atomic_inc(&svsk->sk_inuse); |
223 | rqstp->rq_reserved = serv->sv_bufsz; | 223 | rqstp->rq_reserved = serv->sv_bufsz; |
224 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 224 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
225 | BUG_ON(svsk->sk_pool != pool); | 225 | BUG_ON(svsk->sk_pool != pool); |
226 | wake_up(&rqstp->rq_wait); | 226 | wake_up(&rqstp->rq_wait); |
227 | } else { | 227 | } else { |
228 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | 228 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); |
229 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); | 229 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); |
230 | BUG_ON(svsk->sk_pool != pool); | 230 | BUG_ON(svsk->sk_pool != pool); |
231 | } | 231 | } |
232 | 232 | ||
233 | out_unlock: | 233 | out_unlock: |
234 | spin_unlock_bh(&pool->sp_lock); | 234 | spin_unlock_bh(&pool->sp_lock); |
235 | } | 235 | } |
236 | 236 | ||
237 | /* | 237 | /* |
238 | * Dequeue the first socket. Must be called with the pool->sp_lock held. | 238 | * Dequeue the first socket. Must be called with the pool->sp_lock held. |
239 | */ | 239 | */ |
240 | static inline struct svc_sock * | 240 | static inline struct svc_sock * |
241 | svc_sock_dequeue(struct svc_pool *pool) | 241 | svc_sock_dequeue(struct svc_pool *pool) |
242 | { | 242 | { |
243 | struct svc_sock *svsk; | 243 | struct svc_sock *svsk; |
244 | 244 | ||
245 | if (list_empty(&pool->sp_sockets)) | 245 | if (list_empty(&pool->sp_sockets)) |
246 | return NULL; | 246 | return NULL; |
247 | 247 | ||
248 | svsk = list_entry(pool->sp_sockets.next, | 248 | svsk = list_entry(pool->sp_sockets.next, |
249 | struct svc_sock, sk_ready); | 249 | struct svc_sock, sk_ready); |
250 | list_del_init(&svsk->sk_ready); | 250 | list_del_init(&svsk->sk_ready); |
251 | 251 | ||
252 | dprintk("svc: socket %p dequeued, inuse=%d\n", | 252 | dprintk("svc: socket %p dequeued, inuse=%d\n", |
253 | svsk->sk_sk, atomic_read(&svsk->sk_inuse)); | 253 | svsk->sk_sk, atomic_read(&svsk->sk_inuse)); |
254 | 254 | ||
255 | return svsk; | 255 | return svsk; |
256 | } | 256 | } |
257 | 257 | ||
258 | /* | 258 | /* |
259 | * Having read something from a socket, check whether it | 259 | * Having read something from a socket, check whether it |
260 | * needs to be re-enqueued. | 260 | * needs to be re-enqueued. |
261 | * Note: SK_DATA only gets cleared when a read-attempt finds | 261 | * Note: SK_DATA only gets cleared when a read-attempt finds |
262 | * no (or insufficient) data. | 262 | * no (or insufficient) data. |
263 | */ | 263 | */ |
264 | static inline void | 264 | static inline void |
265 | svc_sock_received(struct svc_sock *svsk) | 265 | svc_sock_received(struct svc_sock *svsk) |
266 | { | 266 | { |
267 | svsk->sk_pool = NULL; | 267 | svsk->sk_pool = NULL; |
268 | clear_bit(SK_BUSY, &svsk->sk_flags); | 268 | clear_bit(SK_BUSY, &svsk->sk_flags); |
269 | svc_sock_enqueue(svsk); | 269 | svc_sock_enqueue(svsk); |
270 | } | 270 | } |
271 | 271 | ||
272 | 272 | ||
273 | /** | 273 | /** |
274 | * svc_reserve - change the space reserved for the reply to a request. | 274 | * svc_reserve - change the space reserved for the reply to a request. |
275 | * @rqstp: The request in question | 275 | * @rqstp: The request in question |
276 | * @space: new max space to reserve | 276 | * @space: new max space to reserve |
277 | * | 277 | * |
278 | * Each request reserves some space on the output queue of the socket | 278 | * Each request reserves some space on the output queue of the socket |
279 | * to make sure the reply fits. This function reduces that reserved | 279 | * to make sure the reply fits. This function reduces that reserved |
280 | * space to be the amount of space used already, plus @space. | 280 | * space to be the amount of space used already, plus @space. |
281 | * | 281 | * |
282 | */ | 282 | */ |
283 | void svc_reserve(struct svc_rqst *rqstp, int space) | 283 | void svc_reserve(struct svc_rqst *rqstp, int space) |
284 | { | 284 | { |
285 | space += rqstp->rq_res.head[0].iov_len; | 285 | space += rqstp->rq_res.head[0].iov_len; |
286 | 286 | ||
287 | if (space < rqstp->rq_reserved) { | 287 | if (space < rqstp->rq_reserved) { |
288 | struct svc_sock *svsk = rqstp->rq_sock; | 288 | struct svc_sock *svsk = rqstp->rq_sock; |
289 | atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved); | 289 | atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved); |
290 | rqstp->rq_reserved = space; | 290 | rqstp->rq_reserved = space; |
291 | 291 | ||
292 | svc_sock_enqueue(svsk); | 292 | svc_sock_enqueue(svsk); |
293 | } | 293 | } |
294 | } | 294 | } |
295 | 295 | ||
296 | /* | 296 | /* |
297 | * Release a socket after use. | 297 | * Release a socket after use. |
298 | */ | 298 | */ |
299 | static inline void | 299 | static inline void |
300 | svc_sock_put(struct svc_sock *svsk) | 300 | svc_sock_put(struct svc_sock *svsk) |
301 | { | 301 | { |
302 | if (atomic_dec_and_test(&svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) { | 302 | if (atomic_dec_and_test(&svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) { |
303 | dprintk("svc: releasing dead socket\n"); | 303 | dprintk("svc: releasing dead socket\n"); |
304 | sock_release(svsk->sk_sock); | 304 | sock_release(svsk->sk_sock); |
305 | kfree(svsk); | 305 | kfree(svsk); |
306 | } | 306 | } |
307 | } | 307 | } |
308 | 308 | ||
309 | static void | 309 | static void |
310 | svc_sock_release(struct svc_rqst *rqstp) | 310 | svc_sock_release(struct svc_rqst *rqstp) |
311 | { | 311 | { |
312 | struct svc_sock *svsk = rqstp->rq_sock; | 312 | struct svc_sock *svsk = rqstp->rq_sock; |
313 | 313 | ||
314 | svc_release_skb(rqstp); | 314 | svc_release_skb(rqstp); |
315 | 315 | ||
316 | svc_free_res_pages(rqstp); | 316 | svc_free_res_pages(rqstp); |
317 | rqstp->rq_res.page_len = 0; | 317 | rqstp->rq_res.page_len = 0; |
318 | rqstp->rq_res.page_base = 0; | 318 | rqstp->rq_res.page_base = 0; |
319 | 319 | ||
320 | 320 | ||
321 | /* Reset response buffer and release | 321 | /* Reset response buffer and release |
322 | * the reservation. | 322 | * the reservation. |
323 | * But first, check that enough space was reserved | 323 | * But first, check that enough space was reserved |
324 | * for the reply, otherwise we have a bug! | 324 | * for the reply, otherwise we have a bug! |
325 | */ | 325 | */ |
326 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | 326 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) |
327 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | 327 | printk(KERN_ERR "RPC request reserved %d but used %d\n", |
328 | rqstp->rq_reserved, | 328 | rqstp->rq_reserved, |
329 | rqstp->rq_res.len); | 329 | rqstp->rq_res.len); |
330 | 330 | ||
331 | rqstp->rq_res.head[0].iov_len = 0; | 331 | rqstp->rq_res.head[0].iov_len = 0; |
332 | svc_reserve(rqstp, 0); | 332 | svc_reserve(rqstp, 0); |
333 | rqstp->rq_sock = NULL; | 333 | rqstp->rq_sock = NULL; |
334 | 334 | ||
335 | svc_sock_put(svsk); | 335 | svc_sock_put(svsk); |
336 | } | 336 | } |
337 | 337 | ||
338 | /* | 338 | /* |
339 | * External function to wake up a server waiting for data | 339 | * External function to wake up a server waiting for data |
340 | * This really only makes sense for services like lockd | 340 | * This really only makes sense for services like lockd |
341 | * which have exactly one thread anyway. | 341 | * which have exactly one thread anyway. |
342 | */ | 342 | */ |
343 | void | 343 | void |
344 | svc_wake_up(struct svc_serv *serv) | 344 | svc_wake_up(struct svc_serv *serv) |
345 | { | 345 | { |
346 | struct svc_rqst *rqstp; | 346 | struct svc_rqst *rqstp; |
347 | unsigned int i; | 347 | unsigned int i; |
348 | struct svc_pool *pool; | 348 | struct svc_pool *pool; |
349 | 349 | ||
350 | for (i = 0; i < serv->sv_nrpools; i++) { | 350 | for (i = 0; i < serv->sv_nrpools; i++) { |
351 | pool = &serv->sv_pools[i]; | 351 | pool = &serv->sv_pools[i]; |
352 | 352 | ||
353 | spin_lock_bh(&pool->sp_lock); | 353 | spin_lock_bh(&pool->sp_lock); |
354 | if (!list_empty(&pool->sp_threads)) { | 354 | if (!list_empty(&pool->sp_threads)) { |
355 | rqstp = list_entry(pool->sp_threads.next, | 355 | rqstp = list_entry(pool->sp_threads.next, |
356 | struct svc_rqst, | 356 | struct svc_rqst, |
357 | rq_list); | 357 | rq_list); |
358 | dprintk("svc: daemon %p woken up.\n", rqstp); | 358 | dprintk("svc: daemon %p woken up.\n", rqstp); |
359 | /* | 359 | /* |
360 | svc_thread_dequeue(pool, rqstp); | 360 | svc_thread_dequeue(pool, rqstp); |
361 | rqstp->rq_sock = NULL; | 361 | rqstp->rq_sock = NULL; |
362 | */ | 362 | */ |
363 | wake_up(&rqstp->rq_wait); | 363 | wake_up(&rqstp->rq_wait); |
364 | } | 364 | } |
365 | spin_unlock_bh(&pool->sp_lock); | 365 | spin_unlock_bh(&pool->sp_lock); |
366 | } | 366 | } |
367 | } | 367 | } |
368 | 368 | ||
369 | /* | 369 | /* |
370 | * Generic sendto routine | 370 | * Generic sendto routine |
371 | */ | 371 | */ |
372 | static int | 372 | static int |
373 | svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | 373 | svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) |
374 | { | 374 | { |
375 | struct svc_sock *svsk = rqstp->rq_sock; | 375 | struct svc_sock *svsk = rqstp->rq_sock; |
376 | struct socket *sock = svsk->sk_sock; | 376 | struct socket *sock = svsk->sk_sock; |
377 | int slen; | 377 | int slen; |
378 | char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))]; | 378 | char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))]; |
379 | struct cmsghdr *cmh = (struct cmsghdr *)buffer; | 379 | struct cmsghdr *cmh = (struct cmsghdr *)buffer; |
380 | struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh); | 380 | struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh); |
381 | int len = 0; | 381 | int len = 0; |
382 | int result; | 382 | int result; |
383 | int size; | 383 | int size; |
384 | struct page **ppage = xdr->pages; | 384 | struct page **ppage = xdr->pages; |
385 | size_t base = xdr->page_base; | 385 | size_t base = xdr->page_base; |
386 | unsigned int pglen = xdr->page_len; | 386 | unsigned int pglen = xdr->page_len; |
387 | unsigned int flags = MSG_MORE; | 387 | unsigned int flags = MSG_MORE; |
388 | 388 | ||
389 | slen = xdr->len; | 389 | slen = xdr->len; |
390 | 390 | ||
391 | if (rqstp->rq_prot == IPPROTO_UDP) { | 391 | if (rqstp->rq_prot == IPPROTO_UDP) { |
392 | /* set the source and destination */ | 392 | /* set the source and destination */ |
393 | struct msghdr msg; | 393 | struct msghdr msg; |
394 | msg.msg_name = &rqstp->rq_addr; | 394 | msg.msg_name = &rqstp->rq_addr; |
395 | msg.msg_namelen = sizeof(rqstp->rq_addr); | 395 | msg.msg_namelen = sizeof(rqstp->rq_addr); |
396 | msg.msg_iov = NULL; | 396 | msg.msg_iov = NULL; |
397 | msg.msg_iovlen = 0; | 397 | msg.msg_iovlen = 0; |
398 | msg.msg_flags = MSG_MORE; | 398 | msg.msg_flags = MSG_MORE; |
399 | 399 | ||
400 | msg.msg_control = cmh; | 400 | msg.msg_control = cmh; |
401 | msg.msg_controllen = sizeof(buffer); | 401 | msg.msg_controllen = sizeof(buffer); |
402 | cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); | 402 | cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); |
403 | cmh->cmsg_level = SOL_IP; | 403 | cmh->cmsg_level = SOL_IP; |
404 | cmh->cmsg_type = IP_PKTINFO; | 404 | cmh->cmsg_type = IP_PKTINFO; |
405 | pki->ipi_ifindex = 0; | 405 | pki->ipi_ifindex = 0; |
406 | pki->ipi_spec_dst.s_addr = rqstp->rq_daddr; | 406 | pki->ipi_spec_dst.s_addr = rqstp->rq_daddr; |
407 | 407 | ||
408 | if (sock_sendmsg(sock, &msg, 0) < 0) | 408 | if (sock_sendmsg(sock, &msg, 0) < 0) |
409 | goto out; | 409 | goto out; |
410 | } | 410 | } |
411 | 411 | ||
412 | /* send head */ | 412 | /* send head */ |
413 | if (slen == xdr->head[0].iov_len) | 413 | if (slen == xdr->head[0].iov_len) |
414 | flags = 0; | 414 | flags = 0; |
415 | len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, | 415 | len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, |
416 | xdr->head[0].iov_len, flags); | 416 | xdr->head[0].iov_len, flags); |
417 | if (len != xdr->head[0].iov_len) | 417 | if (len != xdr->head[0].iov_len) |
418 | goto out; | 418 | goto out; |
419 | slen -= xdr->head[0].iov_len; | 419 | slen -= xdr->head[0].iov_len; |
420 | if (slen == 0) | 420 | if (slen == 0) |
421 | goto out; | 421 | goto out; |
422 | 422 | ||
423 | /* send page data */ | 423 | /* send page data */ |
424 | size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen; | 424 | size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen; |
425 | while (pglen > 0) { | 425 | while (pglen > 0) { |
426 | if (slen == size) | 426 | if (slen == size) |
427 | flags = 0; | 427 | flags = 0; |
428 | result = kernel_sendpage(sock, *ppage, base, size, flags); | 428 | result = kernel_sendpage(sock, *ppage, base, size, flags); |
429 | if (result > 0) | 429 | if (result > 0) |
430 | len += result; | 430 | len += result; |
431 | if (result != size) | 431 | if (result != size) |
432 | goto out; | 432 | goto out; |
433 | slen -= size; | 433 | slen -= size; |
434 | pglen -= size; | 434 | pglen -= size; |
435 | size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen; | 435 | size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen; |
436 | base = 0; | 436 | base = 0; |
437 | ppage++; | 437 | ppage++; |
438 | } | 438 | } |
439 | /* send tail */ | 439 | /* send tail */ |
440 | if (xdr->tail[0].iov_len) { | 440 | if (xdr->tail[0].iov_len) { |
441 | result = kernel_sendpage(sock, rqstp->rq_respages[0], | 441 | result = kernel_sendpage(sock, rqstp->rq_respages[0], |
442 | ((unsigned long)xdr->tail[0].iov_base) | 442 | ((unsigned long)xdr->tail[0].iov_base) |
443 | & (PAGE_SIZE-1), | 443 | & (PAGE_SIZE-1), |
444 | xdr->tail[0].iov_len, 0); | 444 | xdr->tail[0].iov_len, 0); |
445 | 445 | ||
446 | if (result > 0) | 446 | if (result > 0) |
447 | len += result; | 447 | len += result; |
448 | } | 448 | } |
449 | out: | 449 | out: |
450 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %x)\n", | 450 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %x)\n", |
451 | rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len, | 451 | rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len, |
452 | rqstp->rq_addr.sin_addr.s_addr); | 452 | rqstp->rq_addr.sin_addr.s_addr); |
453 | 453 | ||
454 | return len; | 454 | return len; |
455 | } | 455 | } |
456 | 456 | ||
457 | /* | 457 | /* |
458 | * Report socket names for nfsdfs | 458 | * Report socket names for nfsdfs |
459 | */ | 459 | */ |
460 | static int one_sock_name(char *buf, struct svc_sock *svsk) | 460 | static int one_sock_name(char *buf, struct svc_sock *svsk) |
461 | { | 461 | { |
462 | int len; | 462 | int len; |
463 | 463 | ||
464 | switch(svsk->sk_sk->sk_family) { | 464 | switch(svsk->sk_sk->sk_family) { |
465 | case AF_INET: | 465 | case AF_INET: |
466 | len = sprintf(buf, "ipv4 %s %u.%u.%u.%u %d\n", | 466 | len = sprintf(buf, "ipv4 %s %u.%u.%u.%u %d\n", |
467 | svsk->sk_sk->sk_protocol==IPPROTO_UDP? | 467 | svsk->sk_sk->sk_protocol==IPPROTO_UDP? |
468 | "udp" : "tcp", | 468 | "udp" : "tcp", |
469 | NIPQUAD(inet_sk(svsk->sk_sk)->rcv_saddr), | 469 | NIPQUAD(inet_sk(svsk->sk_sk)->rcv_saddr), |
470 | inet_sk(svsk->sk_sk)->num); | 470 | inet_sk(svsk->sk_sk)->num); |
471 | break; | 471 | break; |
472 | default: | 472 | default: |
473 | len = sprintf(buf, "*unknown-%d*\n", | 473 | len = sprintf(buf, "*unknown-%d*\n", |
474 | svsk->sk_sk->sk_family); | 474 | svsk->sk_sk->sk_family); |
475 | } | 475 | } |
476 | return len; | 476 | return len; |
477 | } | 477 | } |
478 | 478 | ||
479 | int | 479 | int |
480 | svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | 480 | svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) |
481 | { | 481 | { |
482 | struct svc_sock *svsk, *closesk = NULL; | 482 | struct svc_sock *svsk, *closesk = NULL; |
483 | int len = 0; | 483 | int len = 0; |
484 | 484 | ||
485 | if (!serv) | 485 | if (!serv) |
486 | return 0; | 486 | return 0; |
487 | spin_lock(&serv->sv_lock); | 487 | spin_lock(&serv->sv_lock); |
488 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { | 488 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { |
489 | int onelen = one_sock_name(buf+len, svsk); | 489 | int onelen = one_sock_name(buf+len, svsk); |
490 | if (toclose && strcmp(toclose, buf+len) == 0) | 490 | if (toclose && strcmp(toclose, buf+len) == 0) |
491 | closesk = svsk; | 491 | closesk = svsk; |
492 | else | 492 | else |
493 | len += onelen; | 493 | len += onelen; |
494 | } | 494 | } |
495 | spin_unlock(&serv->sv_lock); | 495 | spin_unlock(&serv->sv_lock); |
496 | if (closesk) | 496 | if (closesk) |
497 | /* Should unregister with portmap, but you cannot | 497 | /* Should unregister with portmap, but you cannot |
498 | * unregister just one protocol... | 498 | * unregister just one protocol... |
499 | */ | 499 | */ |
500 | svc_delete_socket(closesk); | 500 | svc_delete_socket(closesk); |
501 | else if (toclose) | 501 | else if (toclose) |
502 | return -ENOENT; | 502 | return -ENOENT; |
503 | return len; | 503 | return len; |
504 | } | 504 | } |
505 | EXPORT_SYMBOL(svc_sock_names); | 505 | EXPORT_SYMBOL(svc_sock_names); |
506 | 506 | ||
507 | /* | 507 | /* |
508 | * Check input queue length | 508 | * Check input queue length |
509 | */ | 509 | */ |
510 | static int | 510 | static int |
511 | svc_recv_available(struct svc_sock *svsk) | 511 | svc_recv_available(struct svc_sock *svsk) |
512 | { | 512 | { |
513 | struct socket *sock = svsk->sk_sock; | 513 | struct socket *sock = svsk->sk_sock; |
514 | int avail, err; | 514 | int avail, err; |
515 | 515 | ||
516 | err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); | 516 | err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); |
517 | 517 | ||
518 | return (err >= 0)? avail : err; | 518 | return (err >= 0)? avail : err; |
519 | } | 519 | } |
520 | 520 | ||
521 | /* | 521 | /* |
522 | * Generic recvfrom routine. | 522 | * Generic recvfrom routine. |
523 | */ | 523 | */ |
524 | static int | 524 | static int |
525 | svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) | 525 | svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) |
526 | { | 526 | { |
527 | struct msghdr msg; | 527 | struct msghdr msg; |
528 | struct socket *sock; | 528 | struct socket *sock; |
529 | int len, alen; | 529 | int len, alen; |
530 | 530 | ||
531 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); | 531 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); |
532 | sock = rqstp->rq_sock->sk_sock; | 532 | sock = rqstp->rq_sock->sk_sock; |
533 | 533 | ||
534 | msg.msg_name = &rqstp->rq_addr; | 534 | msg.msg_name = &rqstp->rq_addr; |
535 | msg.msg_namelen = sizeof(rqstp->rq_addr); | 535 | msg.msg_namelen = sizeof(rqstp->rq_addr); |
536 | msg.msg_control = NULL; | 536 | msg.msg_control = NULL; |
537 | msg.msg_controllen = 0; | 537 | msg.msg_controllen = 0; |
538 | 538 | ||
539 | msg.msg_flags = MSG_DONTWAIT; | 539 | msg.msg_flags = MSG_DONTWAIT; |
540 | 540 | ||
541 | len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT); | 541 | len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT); |
542 | 542 | ||
543 | /* sock_recvmsg doesn't fill in the name/namelen, so we must.. | 543 | /* sock_recvmsg doesn't fill in the name/namelen, so we must.. |
544 | * possibly we should cache this in the svc_sock structure | 544 | * possibly we should cache this in the svc_sock structure |
545 | * at accept time. FIXME | 545 | * at accept time. FIXME |
546 | */ | 546 | */ |
547 | alen = sizeof(rqstp->rq_addr); | 547 | alen = sizeof(rqstp->rq_addr); |
548 | kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen); | 548 | kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen); |
549 | 549 | ||
550 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", | 550 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", |
551 | rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); | 551 | rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); |
552 | 552 | ||
553 | return len; | 553 | return len; |
554 | } | 554 | } |
555 | 555 | ||
556 | /* | 556 | /* |
557 | * Set socket snd and rcv buffer lengths | 557 | * Set socket snd and rcv buffer lengths |
558 | */ | 558 | */ |
559 | static inline void | 559 | static inline void |
560 | svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | 560 | svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) |
561 | { | 561 | { |
562 | #if 0 | 562 | #if 0 |
563 | mm_segment_t oldfs; | 563 | mm_segment_t oldfs; |
564 | oldfs = get_fs(); set_fs(KERNEL_DS); | 564 | oldfs = get_fs(); set_fs(KERNEL_DS); |
565 | sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, | 565 | sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, |
566 | (char*)&snd, sizeof(snd)); | 566 | (char*)&snd, sizeof(snd)); |
567 | sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, | 567 | sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, |
568 | (char*)&rcv, sizeof(rcv)); | 568 | (char*)&rcv, sizeof(rcv)); |
569 | #else | 569 | #else |
570 | /* sock_setsockopt limits use to sysctl_?mem_max, | 570 | /* sock_setsockopt limits use to sysctl_?mem_max, |
571 | * which isn't acceptable. Until that is made conditional | 571 | * which isn't acceptable. Until that is made conditional |
572 | * on not having CAP_SYS_RESOURCE or similar, we go direct... | 572 | * on not having CAP_SYS_RESOURCE or similar, we go direct... |
573 | * DaveM said I could! | 573 | * DaveM said I could! |
574 | */ | 574 | */ |
575 | lock_sock(sock->sk); | 575 | lock_sock(sock->sk); |
576 | sock->sk->sk_sndbuf = snd * 2; | 576 | sock->sk->sk_sndbuf = snd * 2; |
577 | sock->sk->sk_rcvbuf = rcv * 2; | 577 | sock->sk->sk_rcvbuf = rcv * 2; |
578 | sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; | 578 | sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; |
579 | release_sock(sock->sk); | 579 | release_sock(sock->sk); |
580 | #endif | 580 | #endif |
581 | } | 581 | } |
582 | /* | 582 | /* |
583 | * INET callback when data has been received on the socket. | 583 | * INET callback when data has been received on the socket. |
584 | */ | 584 | */ |
585 | static void | 585 | static void |
586 | svc_udp_data_ready(struct sock *sk, int count) | 586 | svc_udp_data_ready(struct sock *sk, int count) |
587 | { | 587 | { |
588 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 588 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
589 | 589 | ||
590 | if (svsk) { | 590 | if (svsk) { |
591 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", | 591 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", |
592 | svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); | 592 | svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); |
593 | set_bit(SK_DATA, &svsk->sk_flags); | 593 | set_bit(SK_DATA, &svsk->sk_flags); |
594 | svc_sock_enqueue(svsk); | 594 | svc_sock_enqueue(svsk); |
595 | } | 595 | } |
596 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 596 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
597 | wake_up_interruptible(sk->sk_sleep); | 597 | wake_up_interruptible(sk->sk_sleep); |
598 | } | 598 | } |
599 | 599 | ||
600 | /* | 600 | /* |
601 | * INET callback when space is newly available on the socket. | 601 | * INET callback when space is newly available on the socket. |
602 | */ | 602 | */ |
603 | static void | 603 | static void |
604 | svc_write_space(struct sock *sk) | 604 | svc_write_space(struct sock *sk) |
605 | { | 605 | { |
606 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); | 606 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); |
607 | 607 | ||
608 | if (svsk) { | 608 | if (svsk) { |
609 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", | 609 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", |
610 | svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); | 610 | svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); |
611 | svc_sock_enqueue(svsk); | 611 | svc_sock_enqueue(svsk); |
612 | } | 612 | } |
613 | 613 | ||
614 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { | 614 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { |
615 | dprintk("RPC svc_write_space: someone sleeping on %p\n", | 615 | dprintk("RPC svc_write_space: someone sleeping on %p\n", |
616 | svsk); | 616 | svsk); |
617 | wake_up_interruptible(sk->sk_sleep); | 617 | wake_up_interruptible(sk->sk_sleep); |
618 | } | 618 | } |
619 | } | 619 | } |
620 | 620 | ||
621 | /* | 621 | /* |
622 | * Receive a datagram from a UDP socket. | 622 | * Receive a datagram from a UDP socket. |
623 | */ | 623 | */ |
624 | static int | 624 | static int |
625 | svc_udp_recvfrom(struct svc_rqst *rqstp) | 625 | svc_udp_recvfrom(struct svc_rqst *rqstp) |
626 | { | 626 | { |
627 | struct svc_sock *svsk = rqstp->rq_sock; | 627 | struct svc_sock *svsk = rqstp->rq_sock; |
628 | struct svc_serv *serv = svsk->sk_server; | 628 | struct svc_serv *serv = svsk->sk_server; |
629 | struct sk_buff *skb; | 629 | struct sk_buff *skb; |
630 | int err, len; | 630 | int err, len; |
631 | 631 | ||
632 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | 632 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) |
633 | /* udp sockets need large rcvbuf as all pending | 633 | /* udp sockets need large rcvbuf as all pending |
634 | * requests are still in that buffer. sndbuf must | 634 | * requests are still in that buffer. sndbuf must |
635 | * also be large enough that there is enough space | 635 | * also be large enough that there is enough space |
636 | * for one reply per thread. We count all threads | 636 | * for one reply per thread. We count all threads |
637 | * rather than threads in a particular pool, which | 637 | * rather than threads in a particular pool, which |
638 | * provides an upper bound on the number of threads | 638 | * provides an upper bound on the number of threads |
639 | * which will access the socket. | 639 | * which will access the socket. |
640 | */ | 640 | */ |
641 | svc_sock_setbufsize(svsk->sk_sock, | 641 | svc_sock_setbufsize(svsk->sk_sock, |
642 | (serv->sv_nrthreads+3) * serv->sv_bufsz, | 642 | (serv->sv_nrthreads+3) * serv->sv_bufsz, |
643 | (serv->sv_nrthreads+3) * serv->sv_bufsz); | 643 | (serv->sv_nrthreads+3) * serv->sv_bufsz); |
644 | 644 | ||
645 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 645 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { |
646 | svc_sock_received(svsk); | 646 | svc_sock_received(svsk); |
647 | return svc_deferred_recv(rqstp); | 647 | return svc_deferred_recv(rqstp); |
648 | } | 648 | } |
649 | 649 | ||
650 | clear_bit(SK_DATA, &svsk->sk_flags); | 650 | clear_bit(SK_DATA, &svsk->sk_flags); |
651 | while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { | 651 | while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { |
652 | if (err == -EAGAIN) { | 652 | if (err == -EAGAIN) { |
653 | svc_sock_received(svsk); | 653 | svc_sock_received(svsk); |
654 | return err; | 654 | return err; |
655 | } | 655 | } |
656 | /* possibly an icmp error */ | 656 | /* possibly an icmp error */ |
657 | dprintk("svc: recvfrom returned error %d\n", -err); | 657 | dprintk("svc: recvfrom returned error %d\n", -err); |
658 | } | 658 | } |
659 | if (skb->tstamp.off_sec == 0) { | 659 | if (skb->tstamp.off_sec == 0) { |
660 | struct timeval tv; | 660 | struct timeval tv; |
661 | 661 | ||
662 | tv.tv_sec = xtime.tv_sec; | 662 | tv.tv_sec = xtime.tv_sec; |
663 | tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; | 663 | tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; |
664 | skb_set_timestamp(skb, &tv); | 664 | skb_set_timestamp(skb, &tv); |
665 | /* Don't enable netstamp, sunrpc doesn't | 665 | /* Don't enable netstamp, sunrpc doesn't |
666 | need that much accuracy */ | 666 | need that much accuracy */ |
667 | } | 667 | } |
668 | skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); | 668 | skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); |
669 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ | 669 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ |
670 | 670 | ||
671 | /* | 671 | /* |
672 | * Maybe more packets - kick another thread ASAP. | 672 | * Maybe more packets - kick another thread ASAP. |
673 | */ | 673 | */ |
674 | svc_sock_received(svsk); | 674 | svc_sock_received(svsk); |
675 | 675 | ||
676 | len = skb->len - sizeof(struct udphdr); | 676 | len = skb->len - sizeof(struct udphdr); |
677 | rqstp->rq_arg.len = len; | 677 | rqstp->rq_arg.len = len; |
678 | 678 | ||
679 | rqstp->rq_prot = IPPROTO_UDP; | 679 | rqstp->rq_prot = IPPROTO_UDP; |
680 | 680 | ||
681 | /* Get sender address */ | 681 | /* Get sender address */ |
682 | rqstp->rq_addr.sin_family = AF_INET; | 682 | rqstp->rq_addr.sin_family = AF_INET; |
683 | rqstp->rq_addr.sin_port = skb->h.uh->source; | 683 | rqstp->rq_addr.sin_port = skb->h.uh->source; |
684 | rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; | 684 | rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; |
685 | rqstp->rq_daddr = skb->nh.iph->daddr; | 685 | rqstp->rq_daddr = skb->nh.iph->daddr; |
686 | 686 | ||
687 | if (skb_is_nonlinear(skb)) { | 687 | if (skb_is_nonlinear(skb)) { |
688 | /* we have to copy */ | 688 | /* we have to copy */ |
689 | local_bh_disable(); | 689 | local_bh_disable(); |
690 | if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { | 690 | if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { |
691 | local_bh_enable(); | 691 | local_bh_enable(); |
692 | /* checksum error */ | 692 | /* checksum error */ |
693 | skb_free_datagram(svsk->sk_sk, skb); | 693 | skb_free_datagram(svsk->sk_sk, skb); |
694 | return 0; | 694 | return 0; |
695 | } | 695 | } |
696 | local_bh_enable(); | 696 | local_bh_enable(); |
697 | skb_free_datagram(svsk->sk_sk, skb); | 697 | skb_free_datagram(svsk->sk_sk, skb); |
698 | } else { | 698 | } else { |
699 | /* we can use it in-place */ | 699 | /* we can use it in-place */ |
700 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); | 700 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); |
701 | rqstp->rq_arg.head[0].iov_len = len; | 701 | rqstp->rq_arg.head[0].iov_len = len; |
702 | if (skb_checksum_complete(skb)) { | 702 | if (skb_checksum_complete(skb)) { |
703 | skb_free_datagram(svsk->sk_sk, skb); | 703 | skb_free_datagram(svsk->sk_sk, skb); |
704 | return 0; | 704 | return 0; |
705 | } | 705 | } |
706 | rqstp->rq_skbuff = skb; | 706 | rqstp->rq_skbuff = skb; |
707 | } | 707 | } |
708 | 708 | ||
709 | rqstp->rq_arg.page_base = 0; | 709 | rqstp->rq_arg.page_base = 0; |
710 | if (len <= rqstp->rq_arg.head[0].iov_len) { | 710 | if (len <= rqstp->rq_arg.head[0].iov_len) { |
711 | rqstp->rq_arg.head[0].iov_len = len; | 711 | rqstp->rq_arg.head[0].iov_len = len; |
712 | rqstp->rq_arg.page_len = 0; | 712 | rqstp->rq_arg.page_len = 0; |
713 | rqstp->rq_respages = rqstp->rq_pages+1; | 713 | rqstp->rq_respages = rqstp->rq_pages+1; |
714 | } else { | 714 | } else { |
715 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; | 715 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; |
716 | rqstp->rq_respages = rqstp->rq_pages + 1 + | 716 | rqstp->rq_respages = rqstp->rq_pages + 1 + |
717 | (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; | 717 | (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; |
718 | } | 718 | } |
719 | 719 | ||
720 | if (serv->sv_stats) | 720 | if (serv->sv_stats) |
721 | serv->sv_stats->netudpcnt++; | 721 | serv->sv_stats->netudpcnt++; |
722 | 722 | ||
723 | return len; | 723 | return len; |
724 | } | 724 | } |
725 | 725 | ||
726 | static int | 726 | static int |
727 | svc_udp_sendto(struct svc_rqst *rqstp) | 727 | svc_udp_sendto(struct svc_rqst *rqstp) |
728 | { | 728 | { |
729 | int error; | 729 | int error; |
730 | 730 | ||
731 | error = svc_sendto(rqstp, &rqstp->rq_res); | 731 | error = svc_sendto(rqstp, &rqstp->rq_res); |
732 | if (error == -ECONNREFUSED) | 732 | if (error == -ECONNREFUSED) |
733 | /* ICMP error on earlier request. */ | 733 | /* ICMP error on earlier request. */ |
734 | error = svc_sendto(rqstp, &rqstp->rq_res); | 734 | error = svc_sendto(rqstp, &rqstp->rq_res); |
735 | 735 | ||
736 | return error; | 736 | return error; |
737 | } | 737 | } |
738 | 738 | ||
739 | static void | 739 | static void |
740 | svc_udp_init(struct svc_sock *svsk) | 740 | svc_udp_init(struct svc_sock *svsk) |
741 | { | 741 | { |
742 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; | 742 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; |
743 | svsk->sk_sk->sk_write_space = svc_write_space; | 743 | svsk->sk_sk->sk_write_space = svc_write_space; |
744 | svsk->sk_recvfrom = svc_udp_recvfrom; | 744 | svsk->sk_recvfrom = svc_udp_recvfrom; |
745 | svsk->sk_sendto = svc_udp_sendto; | 745 | svsk->sk_sendto = svc_udp_sendto; |
746 | 746 | ||
747 | /* initialise setting must have enough space to | 747 | /* initialise setting must have enough space to |
748 | * receive and respond to one request. | 748 | * receive and respond to one request. |
749 | * svc_udp_recvfrom will re-adjust if necessary | 749 | * svc_udp_recvfrom will re-adjust if necessary |
750 | */ | 750 | */ |
751 | svc_sock_setbufsize(svsk->sk_sock, | 751 | svc_sock_setbufsize(svsk->sk_sock, |
752 | 3 * svsk->sk_server->sv_bufsz, | 752 | 3 * svsk->sk_server->sv_bufsz, |
753 | 3 * svsk->sk_server->sv_bufsz); | 753 | 3 * svsk->sk_server->sv_bufsz); |
754 | 754 | ||
755 | set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ | 755 | set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ |
756 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 756 | set_bit(SK_CHNGBUF, &svsk->sk_flags); |
757 | } | 757 | } |
758 | 758 | ||
759 | /* | 759 | /* |
760 | * A data_ready event on a listening socket means there's a connection | 760 | * A data_ready event on a listening socket means there's a connection |
761 | * pending. Do not use state_change as a substitute for it. | 761 | * pending. Do not use state_change as a substitute for it. |
762 | */ | 762 | */ |
763 | static void | 763 | static void |
764 | svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | 764 | svc_tcp_listen_data_ready(struct sock *sk, int count_unused) |
765 | { | 765 | { |
766 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 766 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
767 | 767 | ||
768 | dprintk("svc: socket %p TCP (listen) state change %d\n", | 768 | dprintk("svc: socket %p TCP (listen) state change %d\n", |
769 | sk, sk->sk_state); | 769 | sk, sk->sk_state); |
770 | 770 | ||
771 | /* | 771 | /* |
772 | * This callback may called twice when a new connection | 772 | * This callback may called twice when a new connection |
773 | * is established as a child socket inherits everything | 773 | * is established as a child socket inherits everything |
774 | * from a parent LISTEN socket. | 774 | * from a parent LISTEN socket. |
775 | * 1) data_ready method of the parent socket will be called | 775 | * 1) data_ready method of the parent socket will be called |
776 | * when one of child sockets become ESTABLISHED. | 776 | * when one of child sockets become ESTABLISHED. |
777 | * 2) data_ready method of the child socket may be called | 777 | * 2) data_ready method of the child socket may be called |
778 | * when it receives data before the socket is accepted. | 778 | * when it receives data before the socket is accepted. |
779 | * In case of 2, we should ignore it silently. | 779 | * In case of 2, we should ignore it silently. |
780 | */ | 780 | */ |
781 | if (sk->sk_state == TCP_LISTEN) { | 781 | if (sk->sk_state == TCP_LISTEN) { |
782 | if (svsk) { | 782 | if (svsk) { |
783 | set_bit(SK_CONN, &svsk->sk_flags); | 783 | set_bit(SK_CONN, &svsk->sk_flags); |
784 | svc_sock_enqueue(svsk); | 784 | svc_sock_enqueue(svsk); |
785 | } else | 785 | } else |
786 | printk("svc: socket %p: no user data\n", sk); | 786 | printk("svc: socket %p: no user data\n", sk); |
787 | } | 787 | } |
788 | 788 | ||
789 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 789 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
790 | wake_up_interruptible_all(sk->sk_sleep); | 790 | wake_up_interruptible_all(sk->sk_sleep); |
791 | } | 791 | } |
792 | 792 | ||
793 | /* | 793 | /* |
794 | * A state change on a connected socket means it's dying or dead. | 794 | * A state change on a connected socket means it's dying or dead. |
795 | */ | 795 | */ |
796 | static void | 796 | static void |
797 | svc_tcp_state_change(struct sock *sk) | 797 | svc_tcp_state_change(struct sock *sk) |
798 | { | 798 | { |
799 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 799 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
800 | 800 | ||
801 | dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", | 801 | dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", |
802 | sk, sk->sk_state, sk->sk_user_data); | 802 | sk, sk->sk_state, sk->sk_user_data); |
803 | 803 | ||
804 | if (!svsk) | 804 | if (!svsk) |
805 | printk("svc: socket %p: no user data\n", sk); | 805 | printk("svc: socket %p: no user data\n", sk); |
806 | else { | 806 | else { |
807 | set_bit(SK_CLOSE, &svsk->sk_flags); | 807 | set_bit(SK_CLOSE, &svsk->sk_flags); |
808 | svc_sock_enqueue(svsk); | 808 | svc_sock_enqueue(svsk); |
809 | } | 809 | } |
810 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 810 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
811 | wake_up_interruptible_all(sk->sk_sleep); | 811 | wake_up_interruptible_all(sk->sk_sleep); |
812 | } | 812 | } |
813 | 813 | ||
814 | static void | 814 | static void |
815 | svc_tcp_data_ready(struct sock *sk, int count) | 815 | svc_tcp_data_ready(struct sock *sk, int count) |
816 | { | 816 | { |
817 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 817 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
818 | 818 | ||
819 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", | 819 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", |
820 | sk, sk->sk_user_data); | 820 | sk, sk->sk_user_data); |
821 | if (svsk) { | 821 | if (svsk) { |
822 | set_bit(SK_DATA, &svsk->sk_flags); | 822 | set_bit(SK_DATA, &svsk->sk_flags); |
823 | svc_sock_enqueue(svsk); | 823 | svc_sock_enqueue(svsk); |
824 | } | 824 | } |
825 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 825 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
826 | wake_up_interruptible(sk->sk_sleep); | 826 | wake_up_interruptible(sk->sk_sleep); |
827 | } | 827 | } |
828 | 828 | ||
829 | /* | 829 | /* |
830 | * Accept a TCP connection | 830 | * Accept a TCP connection |
831 | */ | 831 | */ |
832 | static void | 832 | static void |
833 | svc_tcp_accept(struct svc_sock *svsk) | 833 | svc_tcp_accept(struct svc_sock *svsk) |
834 | { | 834 | { |
835 | struct sockaddr_in sin; | 835 | struct sockaddr_in sin; |
836 | struct svc_serv *serv = svsk->sk_server; | 836 | struct svc_serv *serv = svsk->sk_server; |
837 | struct socket *sock = svsk->sk_sock; | 837 | struct socket *sock = svsk->sk_sock; |
838 | struct socket *newsock; | 838 | struct socket *newsock; |
839 | struct svc_sock *newsvsk; | 839 | struct svc_sock *newsvsk; |
840 | int err, slen; | 840 | int err, slen; |
841 | 841 | ||
842 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); | 842 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); |
843 | if (!sock) | 843 | if (!sock) |
844 | return; | 844 | return; |
845 | 845 | ||
846 | clear_bit(SK_CONN, &svsk->sk_flags); | 846 | clear_bit(SK_CONN, &svsk->sk_flags); |
847 | err = kernel_accept(sock, &newsock, O_NONBLOCK); | 847 | err = kernel_accept(sock, &newsock, O_NONBLOCK); |
848 | if (err < 0) { | 848 | if (err < 0) { |
849 | if (err == -ENOMEM) | 849 | if (err == -ENOMEM) |
850 | printk(KERN_WARNING "%s: no more sockets!\n", | 850 | printk(KERN_WARNING "%s: no more sockets!\n", |
851 | serv->sv_name); | 851 | serv->sv_name); |
852 | else if (err != -EAGAIN && net_ratelimit()) | 852 | else if (err != -EAGAIN && net_ratelimit()) |
853 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", | 853 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", |
854 | serv->sv_name, -err); | 854 | serv->sv_name, -err); |
855 | return; | 855 | return; |
856 | } | 856 | } |
857 | 857 | ||
858 | set_bit(SK_CONN, &svsk->sk_flags); | 858 | set_bit(SK_CONN, &svsk->sk_flags); |
859 | svc_sock_enqueue(svsk); | 859 | svc_sock_enqueue(svsk); |
860 | 860 | ||
861 | slen = sizeof(sin); | 861 | slen = sizeof(sin); |
862 | err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen); | 862 | err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen); |
863 | if (err < 0) { | 863 | if (err < 0) { |
864 | if (net_ratelimit()) | 864 | if (net_ratelimit()) |
865 | printk(KERN_WARNING "%s: peername failed (err %d)!\n", | 865 | printk(KERN_WARNING "%s: peername failed (err %d)!\n", |
866 | serv->sv_name, -err); | 866 | serv->sv_name, -err); |
867 | goto failed; /* aborted connection or whatever */ | 867 | goto failed; /* aborted connection or whatever */ |
868 | } | 868 | } |
869 | 869 | ||
870 | /* Ideally, we would want to reject connections from unauthorized | 870 | /* Ideally, we would want to reject connections from unauthorized |
871 | * hosts here, but when we get encription, the IP of the host won't | 871 | * hosts here, but when we get encription, the IP of the host won't |
872 | * tell us anything. For now just warn about unpriv connections. | 872 | * tell us anything. For now just warn about unpriv connections. |
873 | */ | 873 | */ |
874 | if (ntohs(sin.sin_port) >= 1024) { | 874 | if (ntohs(sin.sin_port) >= 1024) { |
875 | dprintk(KERN_WARNING | 875 | dprintk(KERN_WARNING |
876 | "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n", | 876 | "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n", |
877 | serv->sv_name, | 877 | serv->sv_name, |
878 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); | 878 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); |
879 | } | 879 | } |
880 | 880 | ||
881 | dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name, | 881 | dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name, |
882 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); | 882 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); |
883 | 883 | ||
884 | /* make sure that a write doesn't block forever when | 884 | /* make sure that a write doesn't block forever when |
885 | * low on memory | 885 | * low on memory |
886 | */ | 886 | */ |
887 | newsock->sk->sk_sndtimeo = HZ*30; | 887 | newsock->sk->sk_sndtimeo = HZ*30; |
888 | 888 | ||
889 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0))) | 889 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0))) |
890 | goto failed; | 890 | goto failed; |
891 | 891 | ||
892 | 892 | ||
893 | /* make sure that we don't have too many active connections. | 893 | /* make sure that we don't have too many active connections. |
894 | * If we have, something must be dropped. | 894 | * If we have, something must be dropped. |
895 | * | 895 | * |
896 | * There's no point in trying to do random drop here for | 896 | * There's no point in trying to do random drop here for |
897 | * DoS prevention. The NFS clients does 1 reconnect in 15 | 897 | * DoS prevention. The NFS clients does 1 reconnect in 15 |
898 | * seconds. An attacker can easily beat that. | 898 | * seconds. An attacker can easily beat that. |
899 | * | 899 | * |
900 | * The only somewhat efficient mechanism would be if drop | 900 | * The only somewhat efficient mechanism would be if drop |
901 | * old connections from the same IP first. But right now | 901 | * old connections from the same IP first. But right now |
902 | * we don't even record the client IP in svc_sock. | 902 | * we don't even record the client IP in svc_sock. |
903 | */ | 903 | */ |
904 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | 904 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { |
905 | struct svc_sock *svsk = NULL; | 905 | struct svc_sock *svsk = NULL; |
906 | spin_lock_bh(&serv->sv_lock); | 906 | spin_lock_bh(&serv->sv_lock); |
907 | if (!list_empty(&serv->sv_tempsocks)) { | 907 | if (!list_empty(&serv->sv_tempsocks)) { |
908 | if (net_ratelimit()) { | 908 | if (net_ratelimit()) { |
909 | /* Try to help the admin */ | 909 | /* Try to help the admin */ |
910 | printk(KERN_NOTICE "%s: too many open TCP " | 910 | printk(KERN_NOTICE "%s: too many open TCP " |
911 | "sockets, consider increasing the " | 911 | "sockets, consider increasing the " |
912 | "number of nfsd threads\n", | 912 | "number of nfsd threads\n", |
913 | serv->sv_name); | 913 | serv->sv_name); |
914 | printk(KERN_NOTICE "%s: last TCP connect from " | 914 | printk(KERN_NOTICE "%s: last TCP connect from " |
915 | "%u.%u.%u.%u:%d\n", | 915 | "%u.%u.%u.%u:%d\n", |
916 | serv->sv_name, | 916 | serv->sv_name, |
917 | NIPQUAD(sin.sin_addr.s_addr), | 917 | NIPQUAD(sin.sin_addr.s_addr), |
918 | ntohs(sin.sin_port)); | 918 | ntohs(sin.sin_port)); |
919 | } | 919 | } |
920 | /* | 920 | /* |
921 | * Always select the oldest socket. It's not fair, | 921 | * Always select the oldest socket. It's not fair, |
922 | * but so is life | 922 | * but so is life |
923 | */ | 923 | */ |
924 | svsk = list_entry(serv->sv_tempsocks.prev, | 924 | svsk = list_entry(serv->sv_tempsocks.prev, |
925 | struct svc_sock, | 925 | struct svc_sock, |
926 | sk_list); | 926 | sk_list); |
927 | set_bit(SK_CLOSE, &svsk->sk_flags); | 927 | set_bit(SK_CLOSE, &svsk->sk_flags); |
928 | atomic_inc(&svsk->sk_inuse); | 928 | atomic_inc(&svsk->sk_inuse); |
929 | } | 929 | } |
930 | spin_unlock_bh(&serv->sv_lock); | 930 | spin_unlock_bh(&serv->sv_lock); |
931 | 931 | ||
932 | if (svsk) { | 932 | if (svsk) { |
933 | svc_sock_enqueue(svsk); | 933 | svc_sock_enqueue(svsk); |
934 | svc_sock_put(svsk); | 934 | svc_sock_put(svsk); |
935 | } | 935 | } |
936 | 936 | ||
937 | } | 937 | } |
938 | 938 | ||
939 | if (serv->sv_stats) | 939 | if (serv->sv_stats) |
940 | serv->sv_stats->nettcpconn++; | 940 | serv->sv_stats->nettcpconn++; |
941 | 941 | ||
942 | return; | 942 | return; |
943 | 943 | ||
944 | failed: | 944 | failed: |
945 | sock_release(newsock); | 945 | sock_release(newsock); |
946 | return; | 946 | return; |
947 | } | 947 | } |
948 | 948 | ||
949 | /* | 949 | /* |
950 | * Receive data from a TCP socket. | 950 | * Receive data from a TCP socket. |
951 | */ | 951 | */ |
952 | static int | 952 | static int |
953 | svc_tcp_recvfrom(struct svc_rqst *rqstp) | 953 | svc_tcp_recvfrom(struct svc_rqst *rqstp) |
954 | { | 954 | { |
955 | struct svc_sock *svsk = rqstp->rq_sock; | 955 | struct svc_sock *svsk = rqstp->rq_sock; |
956 | struct svc_serv *serv = svsk->sk_server; | 956 | struct svc_serv *serv = svsk->sk_server; |
957 | int len; | 957 | int len; |
958 | struct kvec *vec; | 958 | struct kvec *vec; |
959 | int pnum, vlen; | 959 | int pnum, vlen; |
960 | 960 | ||
961 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", | 961 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", |
962 | svsk, test_bit(SK_DATA, &svsk->sk_flags), | 962 | svsk, test_bit(SK_DATA, &svsk->sk_flags), |
963 | test_bit(SK_CONN, &svsk->sk_flags), | 963 | test_bit(SK_CONN, &svsk->sk_flags), |
964 | test_bit(SK_CLOSE, &svsk->sk_flags)); | 964 | test_bit(SK_CLOSE, &svsk->sk_flags)); |
965 | 965 | ||
966 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 966 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { |
967 | svc_sock_received(svsk); | 967 | svc_sock_received(svsk); |
968 | return svc_deferred_recv(rqstp); | 968 | return svc_deferred_recv(rqstp); |
969 | } | 969 | } |
970 | 970 | ||
971 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | 971 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { |
972 | svc_delete_socket(svsk); | 972 | svc_delete_socket(svsk); |
973 | return 0; | 973 | return 0; |
974 | } | 974 | } |
975 | 975 | ||
976 | if (test_bit(SK_CONN, &svsk->sk_flags)) { | 976 | if (test_bit(SK_CONN, &svsk->sk_flags)) { |
977 | svc_tcp_accept(svsk); | 977 | svc_tcp_accept(svsk); |
978 | svc_sock_received(svsk); | 978 | svc_sock_received(svsk); |
979 | return 0; | 979 | return 0; |
980 | } | 980 | } |
981 | 981 | ||
982 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | 982 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) |
983 | /* sndbuf needs to have room for one request | 983 | /* sndbuf needs to have room for one request |
984 | * per thread, otherwise we can stall even when the | 984 | * per thread, otherwise we can stall even when the |
985 | * network isn't a bottleneck. | 985 | * network isn't a bottleneck. |
986 | * | 986 | * |
987 | * We count all threads rather than threads in a | 987 | * We count all threads rather than threads in a |
988 | * particular pool, which provides an upper bound | 988 | * particular pool, which provides an upper bound |
989 | * on the number of threads which will access the socket. | 989 | * on the number of threads which will access the socket. |
990 | * | 990 | * |
991 | * rcvbuf just needs to be able to hold a few requests. | 991 | * rcvbuf just needs to be able to hold a few requests. |
992 | * Normally they will be removed from the queue | 992 | * Normally they will be removed from the queue |
993 | * as soon a a complete request arrives. | 993 | * as soon a a complete request arrives. |
994 | */ | 994 | */ |
995 | svc_sock_setbufsize(svsk->sk_sock, | 995 | svc_sock_setbufsize(svsk->sk_sock, |
996 | (serv->sv_nrthreads+3) * serv->sv_bufsz, | 996 | (serv->sv_nrthreads+3) * serv->sv_bufsz, |
997 | 3 * serv->sv_bufsz); | 997 | 3 * serv->sv_bufsz); |
998 | 998 | ||
999 | clear_bit(SK_DATA, &svsk->sk_flags); | 999 | clear_bit(SK_DATA, &svsk->sk_flags); |
1000 | 1000 | ||
1001 | /* Receive data. If we haven't got the record length yet, get | 1001 | /* Receive data. If we haven't got the record length yet, get |
1002 | * the next four bytes. Otherwise try to gobble up as much as | 1002 | * the next four bytes. Otherwise try to gobble up as much as |
1003 | * possible up to the complete record length. | 1003 | * possible up to the complete record length. |
1004 | */ | 1004 | */ |
1005 | if (svsk->sk_tcplen < 4) { | 1005 | if (svsk->sk_tcplen < 4) { |
1006 | unsigned long want = 4 - svsk->sk_tcplen; | 1006 | unsigned long want = 4 - svsk->sk_tcplen; |
1007 | struct kvec iov; | 1007 | struct kvec iov; |
1008 | 1008 | ||
1009 | iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; | 1009 | iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; |
1010 | iov.iov_len = want; | 1010 | iov.iov_len = want; |
1011 | if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) | 1011 | if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) |
1012 | goto error; | 1012 | goto error; |
1013 | svsk->sk_tcplen += len; | 1013 | svsk->sk_tcplen += len; |
1014 | 1014 | ||
1015 | if (len < want) { | 1015 | if (len < want) { |
1016 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", | 1016 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", |
1017 | len, want); | 1017 | len, want); |
1018 | svc_sock_received(svsk); | 1018 | svc_sock_received(svsk); |
1019 | return -EAGAIN; /* record header not complete */ | 1019 | return -EAGAIN; /* record header not complete */ |
1020 | } | 1020 | } |
1021 | 1021 | ||
1022 | svsk->sk_reclen = ntohl(svsk->sk_reclen); | 1022 | svsk->sk_reclen = ntohl(svsk->sk_reclen); |
1023 | if (!(svsk->sk_reclen & 0x80000000)) { | 1023 | if (!(svsk->sk_reclen & 0x80000000)) { |
1024 | /* FIXME: technically, a record can be fragmented, | 1024 | /* FIXME: technically, a record can be fragmented, |
1025 | * and non-terminal fragments will not have the top | 1025 | * and non-terminal fragments will not have the top |
1026 | * bit set in the fragment length header. | 1026 | * bit set in the fragment length header. |
1027 | * But apparently no known nfs clients send fragmented | 1027 | * But apparently no known nfs clients send fragmented |
1028 | * records. */ | 1028 | * records. */ |
1029 | printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n", | 1029 | printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n", |
1030 | (unsigned long) svsk->sk_reclen); | 1030 | (unsigned long) svsk->sk_reclen); |
1031 | goto err_delete; | 1031 | goto err_delete; |
1032 | } | 1032 | } |
1033 | svsk->sk_reclen &= 0x7fffffff; | 1033 | svsk->sk_reclen &= 0x7fffffff; |
1034 | dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); | 1034 | dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); |
1035 | if (svsk->sk_reclen > serv->sv_bufsz) { | 1035 | if (svsk->sk_reclen > serv->sv_bufsz) { |
1036 | printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", | 1036 | printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", |
1037 | (unsigned long) svsk->sk_reclen); | 1037 | (unsigned long) svsk->sk_reclen); |
1038 | goto err_delete; | 1038 | goto err_delete; |
1039 | } | 1039 | } |
1040 | } | 1040 | } |
1041 | 1041 | ||
1042 | /* Check whether enough data is available */ | 1042 | /* Check whether enough data is available */ |
1043 | len = svc_recv_available(svsk); | 1043 | len = svc_recv_available(svsk); |
1044 | if (len < 0) | 1044 | if (len < 0) |
1045 | goto error; | 1045 | goto error; |
1046 | 1046 | ||
1047 | if (len < svsk->sk_reclen) { | 1047 | if (len < svsk->sk_reclen) { |
1048 | dprintk("svc: incomplete TCP record (%d of %d)\n", | 1048 | dprintk("svc: incomplete TCP record (%d of %d)\n", |
1049 | len, svsk->sk_reclen); | 1049 | len, svsk->sk_reclen); |
1050 | svc_sock_received(svsk); | 1050 | svc_sock_received(svsk); |
1051 | return -EAGAIN; /* record not complete */ | 1051 | return -EAGAIN; /* record not complete */ |
1052 | } | 1052 | } |
1053 | len = svsk->sk_reclen; | 1053 | len = svsk->sk_reclen; |
1054 | set_bit(SK_DATA, &svsk->sk_flags); | 1054 | set_bit(SK_DATA, &svsk->sk_flags); |
1055 | 1055 | ||
1056 | vec = rqstp->rq_vec; | 1056 | vec = rqstp->rq_vec; |
1057 | vec[0] = rqstp->rq_arg.head[0]; | 1057 | vec[0] = rqstp->rq_arg.head[0]; |
1058 | vlen = PAGE_SIZE; | 1058 | vlen = PAGE_SIZE; |
1059 | pnum = 1; | 1059 | pnum = 1; |
1060 | while (vlen < len) { | 1060 | while (vlen < len) { |
1061 | vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); | 1061 | vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); |
1062 | vec[pnum].iov_len = PAGE_SIZE; | 1062 | vec[pnum].iov_len = PAGE_SIZE; |
1063 | pnum++; | 1063 | pnum++; |
1064 | vlen += PAGE_SIZE; | 1064 | vlen += PAGE_SIZE; |
1065 | } | 1065 | } |
1066 | rqstp->rq_respages = &rqstp->rq_pages[pnum]; | 1066 | rqstp->rq_respages = &rqstp->rq_pages[pnum]; |
1067 | 1067 | ||
1068 | /* Now receive data */ | 1068 | /* Now receive data */ |
1069 | len = svc_recvfrom(rqstp, vec, pnum, len); | 1069 | len = svc_recvfrom(rqstp, vec, pnum, len); |
1070 | if (len < 0) | 1070 | if (len < 0) |
1071 | goto error; | 1071 | goto error; |
1072 | 1072 | ||
1073 | dprintk("svc: TCP complete record (%d bytes)\n", len); | 1073 | dprintk("svc: TCP complete record (%d bytes)\n", len); |
1074 | rqstp->rq_arg.len = len; | 1074 | rqstp->rq_arg.len = len; |
1075 | rqstp->rq_arg.page_base = 0; | 1075 | rqstp->rq_arg.page_base = 0; |
1076 | if (len <= rqstp->rq_arg.head[0].iov_len) { | 1076 | if (len <= rqstp->rq_arg.head[0].iov_len) { |
1077 | rqstp->rq_arg.head[0].iov_len = len; | 1077 | rqstp->rq_arg.head[0].iov_len = len; |
1078 | rqstp->rq_arg.page_len = 0; | 1078 | rqstp->rq_arg.page_len = 0; |
1079 | } else { | 1079 | } else { |
1080 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; | 1080 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | rqstp->rq_skbuff = NULL; | 1083 | rqstp->rq_skbuff = NULL; |
1084 | rqstp->rq_prot = IPPROTO_TCP; | 1084 | rqstp->rq_prot = IPPROTO_TCP; |
1085 | 1085 | ||
1086 | /* Reset TCP read info */ | 1086 | /* Reset TCP read info */ |
1087 | svsk->sk_reclen = 0; | 1087 | svsk->sk_reclen = 0; |
1088 | svsk->sk_tcplen = 0; | 1088 | svsk->sk_tcplen = 0; |
1089 | 1089 | ||
1090 | svc_sock_received(svsk); | 1090 | svc_sock_received(svsk); |
1091 | if (serv->sv_stats) | 1091 | if (serv->sv_stats) |
1092 | serv->sv_stats->nettcpcnt++; | 1092 | serv->sv_stats->nettcpcnt++; |
1093 | 1093 | ||
1094 | return len; | 1094 | return len; |
1095 | 1095 | ||
1096 | err_delete: | 1096 | err_delete: |
1097 | svc_delete_socket(svsk); | 1097 | svc_delete_socket(svsk); |
1098 | return -EAGAIN; | 1098 | return -EAGAIN; |
1099 | 1099 | ||
1100 | error: | 1100 | error: |
1101 | if (len == -EAGAIN) { | 1101 | if (len == -EAGAIN) { |
1102 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); | 1102 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); |
1103 | svc_sock_received(svsk); | 1103 | svc_sock_received(svsk); |
1104 | } else { | 1104 | } else { |
1105 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", | 1105 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", |
1106 | svsk->sk_server->sv_name, -len); | 1106 | svsk->sk_server->sv_name, -len); |
1107 | goto err_delete; | 1107 | goto err_delete; |
1108 | } | 1108 | } |
1109 | 1109 | ||
1110 | return len; | 1110 | return len; |
1111 | } | 1111 | } |
1112 | 1112 | ||
1113 | /* | 1113 | /* |
1114 | * Send out data on TCP socket. | 1114 | * Send out data on TCP socket. |
1115 | */ | 1115 | */ |
1116 | static int | 1116 | static int |
1117 | svc_tcp_sendto(struct svc_rqst *rqstp) | 1117 | svc_tcp_sendto(struct svc_rqst *rqstp) |
1118 | { | 1118 | { |
1119 | struct xdr_buf *xbufp = &rqstp->rq_res; | 1119 | struct xdr_buf *xbufp = &rqstp->rq_res; |
1120 | int sent; | 1120 | int sent; |
1121 | __be32 reclen; | 1121 | __be32 reclen; |
1122 | 1122 | ||
1123 | /* Set up the first element of the reply kvec. | 1123 | /* Set up the first element of the reply kvec. |
1124 | * Any other kvecs that may be in use have been taken | 1124 | * Any other kvecs that may be in use have been taken |
1125 | * care of by the server implementation itself. | 1125 | * care of by the server implementation itself. |
1126 | */ | 1126 | */ |
1127 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); | 1127 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); |
1128 | memcpy(xbufp->head[0].iov_base, &reclen, 4); | 1128 | memcpy(xbufp->head[0].iov_base, &reclen, 4); |
1129 | 1129 | ||
1130 | if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) | 1130 | if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) |
1131 | return -ENOTCONN; | 1131 | return -ENOTCONN; |
1132 | 1132 | ||
1133 | sent = svc_sendto(rqstp, &rqstp->rq_res); | 1133 | sent = svc_sendto(rqstp, &rqstp->rq_res); |
1134 | if (sent != xbufp->len) { | 1134 | if (sent != xbufp->len) { |
1135 | printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", | 1135 | printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", |
1136 | rqstp->rq_sock->sk_server->sv_name, | 1136 | rqstp->rq_sock->sk_server->sv_name, |
1137 | (sent<0)?"got error":"sent only", | 1137 | (sent<0)?"got error":"sent only", |
1138 | sent, xbufp->len); | 1138 | sent, xbufp->len); |
1139 | svc_delete_socket(rqstp->rq_sock); | 1139 | svc_delete_socket(rqstp->rq_sock); |
1140 | sent = -EAGAIN; | 1140 | sent = -EAGAIN; |
1141 | } | 1141 | } |
1142 | return sent; | 1142 | return sent; |
1143 | } | 1143 | } |
1144 | 1144 | ||
1145 | static void | 1145 | static void |
1146 | svc_tcp_init(struct svc_sock *svsk) | 1146 | svc_tcp_init(struct svc_sock *svsk) |
1147 | { | 1147 | { |
1148 | struct sock *sk = svsk->sk_sk; | 1148 | struct sock *sk = svsk->sk_sk; |
1149 | struct tcp_sock *tp = tcp_sk(sk); | 1149 | struct tcp_sock *tp = tcp_sk(sk); |
1150 | 1150 | ||
1151 | svsk->sk_recvfrom = svc_tcp_recvfrom; | 1151 | svsk->sk_recvfrom = svc_tcp_recvfrom; |
1152 | svsk->sk_sendto = svc_tcp_sendto; | 1152 | svsk->sk_sendto = svc_tcp_sendto; |
1153 | 1153 | ||
1154 | if (sk->sk_state == TCP_LISTEN) { | 1154 | if (sk->sk_state == TCP_LISTEN) { |
1155 | dprintk("setting up TCP socket for listening\n"); | 1155 | dprintk("setting up TCP socket for listening\n"); |
1156 | sk->sk_data_ready = svc_tcp_listen_data_ready; | 1156 | sk->sk_data_ready = svc_tcp_listen_data_ready; |
1157 | set_bit(SK_CONN, &svsk->sk_flags); | 1157 | set_bit(SK_CONN, &svsk->sk_flags); |
1158 | } else { | 1158 | } else { |
1159 | dprintk("setting up TCP socket for reading\n"); | 1159 | dprintk("setting up TCP socket for reading\n"); |
1160 | sk->sk_state_change = svc_tcp_state_change; | 1160 | sk->sk_state_change = svc_tcp_state_change; |
1161 | sk->sk_data_ready = svc_tcp_data_ready; | 1161 | sk->sk_data_ready = svc_tcp_data_ready; |
1162 | sk->sk_write_space = svc_write_space; | 1162 | sk->sk_write_space = svc_write_space; |
1163 | 1163 | ||
1164 | svsk->sk_reclen = 0; | 1164 | svsk->sk_reclen = 0; |
1165 | svsk->sk_tcplen = 0; | 1165 | svsk->sk_tcplen = 0; |
1166 | 1166 | ||
1167 | tp->nonagle = 1; /* disable Nagle's algorithm */ | 1167 | tp->nonagle = 1; /* disable Nagle's algorithm */ |
1168 | 1168 | ||
1169 | /* initialise setting must have enough space to | 1169 | /* initialise setting must have enough space to |
1170 | * receive and respond to one request. | 1170 | * receive and respond to one request. |
1171 | * svc_tcp_recvfrom will re-adjust if necessary | 1171 | * svc_tcp_recvfrom will re-adjust if necessary |
1172 | */ | 1172 | */ |
1173 | svc_sock_setbufsize(svsk->sk_sock, | 1173 | svc_sock_setbufsize(svsk->sk_sock, |
1174 | 3 * svsk->sk_server->sv_bufsz, | 1174 | 3 * svsk->sk_server->sv_bufsz, |
1175 | 3 * svsk->sk_server->sv_bufsz); | 1175 | 3 * svsk->sk_server->sv_bufsz); |
1176 | 1176 | ||
1177 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1177 | set_bit(SK_CHNGBUF, &svsk->sk_flags); |
1178 | set_bit(SK_DATA, &svsk->sk_flags); | 1178 | set_bit(SK_DATA, &svsk->sk_flags); |
1179 | if (sk->sk_state != TCP_ESTABLISHED) | 1179 | if (sk->sk_state != TCP_ESTABLISHED) |
1180 | set_bit(SK_CLOSE, &svsk->sk_flags); | 1180 | set_bit(SK_CLOSE, &svsk->sk_flags); |
1181 | } | 1181 | } |
1182 | } | 1182 | } |
1183 | 1183 | ||
1184 | void | 1184 | void |
1185 | svc_sock_update_bufs(struct svc_serv *serv) | 1185 | svc_sock_update_bufs(struct svc_serv *serv) |
1186 | { | 1186 | { |
1187 | /* | 1187 | /* |
1188 | * The number of server threads has changed. Update | 1188 | * The number of server threads has changed. Update |
1189 | * rcvbuf and sndbuf accordingly on all sockets | 1189 | * rcvbuf and sndbuf accordingly on all sockets |
1190 | */ | 1190 | */ |
1191 | struct list_head *le; | 1191 | struct list_head *le; |
1192 | 1192 | ||
1193 | spin_lock_bh(&serv->sv_lock); | 1193 | spin_lock_bh(&serv->sv_lock); |
1194 | list_for_each(le, &serv->sv_permsocks) { | 1194 | list_for_each(le, &serv->sv_permsocks) { |
1195 | struct svc_sock *svsk = | 1195 | struct svc_sock *svsk = |
1196 | list_entry(le, struct svc_sock, sk_list); | 1196 | list_entry(le, struct svc_sock, sk_list); |
1197 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1197 | set_bit(SK_CHNGBUF, &svsk->sk_flags); |
1198 | } | 1198 | } |
1199 | list_for_each(le, &serv->sv_tempsocks) { | 1199 | list_for_each(le, &serv->sv_tempsocks) { |
1200 | struct svc_sock *svsk = | 1200 | struct svc_sock *svsk = |
1201 | list_entry(le, struct svc_sock, sk_list); | 1201 | list_entry(le, struct svc_sock, sk_list); |
1202 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1202 | set_bit(SK_CHNGBUF, &svsk->sk_flags); |
1203 | } | 1203 | } |
1204 | spin_unlock_bh(&serv->sv_lock); | 1204 | spin_unlock_bh(&serv->sv_lock); |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | /* | 1207 | /* |
1208 | * Receive the next request on any socket. This code is carefully | 1208 | * Receive the next request on any socket. This code is carefully |
1209 | * organised not to touch any cachelines in the shared svc_serv | 1209 | * organised not to touch any cachelines in the shared svc_serv |
1210 | * structure, only cachelines in the local svc_pool. | 1210 | * structure, only cachelines in the local svc_pool. |
1211 | */ | 1211 | */ |
1212 | int | 1212 | int |
1213 | svc_recv(struct svc_rqst *rqstp, long timeout) | 1213 | svc_recv(struct svc_rqst *rqstp, long timeout) |
1214 | { | 1214 | { |
1215 | struct svc_sock *svsk =NULL; | 1215 | struct svc_sock *svsk =NULL; |
1216 | struct svc_serv *serv = rqstp->rq_server; | 1216 | struct svc_serv *serv = rqstp->rq_server; |
1217 | struct svc_pool *pool = rqstp->rq_pool; | 1217 | struct svc_pool *pool = rqstp->rq_pool; |
1218 | int len, i; | 1218 | int len, i; |
1219 | int pages; | 1219 | int pages; |
1220 | struct xdr_buf *arg; | 1220 | struct xdr_buf *arg; |
1221 | DECLARE_WAITQUEUE(wait, current); | 1221 | DECLARE_WAITQUEUE(wait, current); |
1222 | 1222 | ||
1223 | dprintk("svc: server %p waiting for data (to = %ld)\n", | 1223 | dprintk("svc: server %p waiting for data (to = %ld)\n", |
1224 | rqstp, timeout); | 1224 | rqstp, timeout); |
1225 | 1225 | ||
1226 | if (rqstp->rq_sock) | 1226 | if (rqstp->rq_sock) |
1227 | printk(KERN_ERR | 1227 | printk(KERN_ERR |
1228 | "svc_recv: service %p, socket not NULL!\n", | 1228 | "svc_recv: service %p, socket not NULL!\n", |
1229 | rqstp); | 1229 | rqstp); |
1230 | if (waitqueue_active(&rqstp->rq_wait)) | 1230 | if (waitqueue_active(&rqstp->rq_wait)) |
1231 | printk(KERN_ERR | 1231 | printk(KERN_ERR |
1232 | "svc_recv: service %p, wait queue active!\n", | 1232 | "svc_recv: service %p, wait queue active!\n", |
1233 | rqstp); | 1233 | rqstp); |
1234 | 1234 | ||
1235 | 1235 | ||
1236 | /* now allocate needed pages. If we get a failure, sleep briefly */ | 1236 | /* now allocate needed pages. If we get a failure, sleep briefly */ |
1237 | pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE; | 1237 | pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE; |
1238 | for (i=0; i < pages ; i++) | 1238 | for (i=0; i < pages ; i++) |
1239 | while (rqstp->rq_pages[i] == NULL) { | 1239 | while (rqstp->rq_pages[i] == NULL) { |
1240 | struct page *p = alloc_page(GFP_KERNEL); | 1240 | struct page *p = alloc_page(GFP_KERNEL); |
1241 | if (!p) | 1241 | if (!p) |
1242 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | 1242 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); |
1243 | rqstp->rq_pages[i] = p; | 1243 | rqstp->rq_pages[i] = p; |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | /* Make arg->head point to first page and arg->pages point to rest */ | 1246 | /* Make arg->head point to first page and arg->pages point to rest */ |
1247 | arg = &rqstp->rq_arg; | 1247 | arg = &rqstp->rq_arg; |
1248 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | 1248 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); |
1249 | arg->head[0].iov_len = PAGE_SIZE; | 1249 | arg->head[0].iov_len = PAGE_SIZE; |
1250 | arg->pages = rqstp->rq_pages + 1; | 1250 | arg->pages = rqstp->rq_pages + 1; |
1251 | arg->page_base = 0; | 1251 | arg->page_base = 0; |
1252 | /* save at least one page for response */ | 1252 | /* save at least one page for response */ |
1253 | arg->page_len = (pages-2)*PAGE_SIZE; | 1253 | arg->page_len = (pages-2)*PAGE_SIZE; |
1254 | arg->len = (pages-1)*PAGE_SIZE; | 1254 | arg->len = (pages-1)*PAGE_SIZE; |
1255 | arg->tail[0].iov_len = 0; | 1255 | arg->tail[0].iov_len = 0; |
1256 | 1256 | ||
1257 | try_to_freeze(); | 1257 | try_to_freeze(); |
1258 | cond_resched(); | 1258 | cond_resched(); |
1259 | if (signalled()) | 1259 | if (signalled()) |
1260 | return -EINTR; | 1260 | return -EINTR; |
1261 | 1261 | ||
1262 | spin_lock_bh(&pool->sp_lock); | 1262 | spin_lock_bh(&pool->sp_lock); |
1263 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { | 1263 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { |
1264 | rqstp->rq_sock = svsk; | 1264 | rqstp->rq_sock = svsk; |
1265 | atomic_inc(&svsk->sk_inuse); | 1265 | atomic_inc(&svsk->sk_inuse); |
1266 | rqstp->rq_reserved = serv->sv_bufsz; | 1266 | rqstp->rq_reserved = serv->sv_bufsz; |
1267 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 1267 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
1268 | } else { | 1268 | } else { |
1269 | /* No data pending. Go to sleep */ | 1269 | /* No data pending. Go to sleep */ |
1270 | svc_thread_enqueue(pool, rqstp); | 1270 | svc_thread_enqueue(pool, rqstp); |
1271 | 1271 | ||
1272 | /* | 1272 | /* |
1273 | * We have to be able to interrupt this wait | 1273 | * We have to be able to interrupt this wait |
1274 | * to bring down the daemons ... | 1274 | * to bring down the daemons ... |
1275 | */ | 1275 | */ |
1276 | set_current_state(TASK_INTERRUPTIBLE); | 1276 | set_current_state(TASK_INTERRUPTIBLE); |
1277 | add_wait_queue(&rqstp->rq_wait, &wait); | 1277 | add_wait_queue(&rqstp->rq_wait, &wait); |
1278 | spin_unlock_bh(&pool->sp_lock); | 1278 | spin_unlock_bh(&pool->sp_lock); |
1279 | 1279 | ||
1280 | schedule_timeout(timeout); | 1280 | schedule_timeout(timeout); |
1281 | 1281 | ||
1282 | try_to_freeze(); | 1282 | try_to_freeze(); |
1283 | 1283 | ||
1284 | spin_lock_bh(&pool->sp_lock); | 1284 | spin_lock_bh(&pool->sp_lock); |
1285 | remove_wait_queue(&rqstp->rq_wait, &wait); | 1285 | remove_wait_queue(&rqstp->rq_wait, &wait); |
1286 | 1286 | ||
1287 | if (!(svsk = rqstp->rq_sock)) { | 1287 | if (!(svsk = rqstp->rq_sock)) { |
1288 | svc_thread_dequeue(pool, rqstp); | 1288 | svc_thread_dequeue(pool, rqstp); |
1289 | spin_unlock_bh(&pool->sp_lock); | 1289 | spin_unlock_bh(&pool->sp_lock); |
1290 | dprintk("svc: server %p, no data yet\n", rqstp); | 1290 | dprintk("svc: server %p, no data yet\n", rqstp); |
1291 | return signalled()? -EINTR : -EAGAIN; | 1291 | return signalled()? -EINTR : -EAGAIN; |
1292 | } | 1292 | } |
1293 | } | 1293 | } |
1294 | spin_unlock_bh(&pool->sp_lock); | 1294 | spin_unlock_bh(&pool->sp_lock); |
1295 | 1295 | ||
1296 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", | 1296 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", |
1297 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); | 1297 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); |
1298 | len = svsk->sk_recvfrom(rqstp); | 1298 | len = svsk->sk_recvfrom(rqstp); |
1299 | dprintk("svc: got len=%d\n", len); | 1299 | dprintk("svc: got len=%d\n", len); |
1300 | 1300 | ||
1301 | /* No data, incomplete (TCP) read, or accept() */ | 1301 | /* No data, incomplete (TCP) read, or accept() */ |
1302 | if (len == 0 || len == -EAGAIN) { | 1302 | if (len == 0 || len == -EAGAIN) { |
1303 | rqstp->rq_res.len = 0; | 1303 | rqstp->rq_res.len = 0; |
1304 | svc_sock_release(rqstp); | 1304 | svc_sock_release(rqstp); |
1305 | return -EAGAIN; | 1305 | return -EAGAIN; |
1306 | } | 1306 | } |
1307 | svsk->sk_lastrecv = get_seconds(); | 1307 | svsk->sk_lastrecv = get_seconds(); |
1308 | clear_bit(SK_OLD, &svsk->sk_flags); | 1308 | clear_bit(SK_OLD, &svsk->sk_flags); |
1309 | 1309 | ||
1310 | rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; | 1310 | rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; |
1311 | rqstp->rq_chandle.defer = svc_defer; | 1311 | rqstp->rq_chandle.defer = svc_defer; |
1312 | 1312 | ||
1313 | if (serv->sv_stats) | 1313 | if (serv->sv_stats) |
1314 | serv->sv_stats->netcnt++; | 1314 | serv->sv_stats->netcnt++; |
1315 | return len; | 1315 | return len; |
1316 | } | 1316 | } |
1317 | 1317 | ||
1318 | /* | 1318 | /* |
1319 | * Drop request | 1319 | * Drop request |
1320 | */ | 1320 | */ |
1321 | void | 1321 | void |
1322 | svc_drop(struct svc_rqst *rqstp) | 1322 | svc_drop(struct svc_rqst *rqstp) |
1323 | { | 1323 | { |
1324 | dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); | 1324 | dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); |
1325 | svc_sock_release(rqstp); | 1325 | svc_sock_release(rqstp); |
1326 | } | 1326 | } |
1327 | 1327 | ||
1328 | /* | 1328 | /* |
1329 | * Return reply to client. | 1329 | * Return reply to client. |
1330 | */ | 1330 | */ |
1331 | int | 1331 | int |
1332 | svc_send(struct svc_rqst *rqstp) | 1332 | svc_send(struct svc_rqst *rqstp) |
1333 | { | 1333 | { |
1334 | struct svc_sock *svsk; | 1334 | struct svc_sock *svsk; |
1335 | int len; | 1335 | int len; |
1336 | struct xdr_buf *xb; | 1336 | struct xdr_buf *xb; |
1337 | 1337 | ||
1338 | if ((svsk = rqstp->rq_sock) == NULL) { | 1338 | if ((svsk = rqstp->rq_sock) == NULL) { |
1339 | printk(KERN_WARNING "NULL socket pointer in %s:%d\n", | 1339 | printk(KERN_WARNING "NULL socket pointer in %s:%d\n", |
1340 | __FILE__, __LINE__); | 1340 | __FILE__, __LINE__); |
1341 | return -EFAULT; | 1341 | return -EFAULT; |
1342 | } | 1342 | } |
1343 | 1343 | ||
1344 | /* release the receive skb before sending the reply */ | 1344 | /* release the receive skb before sending the reply */ |
1345 | svc_release_skb(rqstp); | 1345 | svc_release_skb(rqstp); |
1346 | 1346 | ||
1347 | /* calculate over-all length */ | 1347 | /* calculate over-all length */ |
1348 | xb = & rqstp->rq_res; | 1348 | xb = & rqstp->rq_res; |
1349 | xb->len = xb->head[0].iov_len + | 1349 | xb->len = xb->head[0].iov_len + |
1350 | xb->page_len + | 1350 | xb->page_len + |
1351 | xb->tail[0].iov_len; | 1351 | xb->tail[0].iov_len; |
1352 | 1352 | ||
1353 | /* Grab svsk->sk_mutex to serialize outgoing data. */ | 1353 | /* Grab svsk->sk_mutex to serialize outgoing data. */ |
1354 | mutex_lock(&svsk->sk_mutex); | 1354 | mutex_lock(&svsk->sk_mutex); |
1355 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | 1355 | if (test_bit(SK_DEAD, &svsk->sk_flags)) |
1356 | len = -ENOTCONN; | 1356 | len = -ENOTCONN; |
1357 | else | 1357 | else |
1358 | len = svsk->sk_sendto(rqstp); | 1358 | len = svsk->sk_sendto(rqstp); |
1359 | mutex_unlock(&svsk->sk_mutex); | 1359 | mutex_unlock(&svsk->sk_mutex); |
1360 | svc_sock_release(rqstp); | 1360 | svc_sock_release(rqstp); |
1361 | 1361 | ||
1362 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | 1362 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) |
1363 | return 0; | 1363 | return 0; |
1364 | return len; | 1364 | return len; |
1365 | } | 1365 | } |
1366 | 1366 | ||
1367 | /* | 1367 | /* |
1368 | * Timer function to close old temporary sockets, using | 1368 | * Timer function to close old temporary sockets, using |
1369 | * a mark-and-sweep algorithm. | 1369 | * a mark-and-sweep algorithm. |
1370 | */ | 1370 | */ |
1371 | static void | 1371 | static void |
1372 | svc_age_temp_sockets(unsigned long closure) | 1372 | svc_age_temp_sockets(unsigned long closure) |
1373 | { | 1373 | { |
1374 | struct svc_serv *serv = (struct svc_serv *)closure; | 1374 | struct svc_serv *serv = (struct svc_serv *)closure; |
1375 | struct svc_sock *svsk; | 1375 | struct svc_sock *svsk; |
1376 | struct list_head *le, *next; | 1376 | struct list_head *le, *next; |
1377 | LIST_HEAD(to_be_aged); | 1377 | LIST_HEAD(to_be_aged); |
1378 | 1378 | ||
1379 | dprintk("svc_age_temp_sockets\n"); | 1379 | dprintk("svc_age_temp_sockets\n"); |
1380 | 1380 | ||
1381 | if (!spin_trylock_bh(&serv->sv_lock)) { | 1381 | if (!spin_trylock_bh(&serv->sv_lock)) { |
1382 | /* busy, try again 1 sec later */ | 1382 | /* busy, try again 1 sec later */ |
1383 | dprintk("svc_age_temp_sockets: busy\n"); | 1383 | dprintk("svc_age_temp_sockets: busy\n"); |
1384 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | 1384 | mod_timer(&serv->sv_temptimer, jiffies + HZ); |
1385 | return; | 1385 | return; |
1386 | } | 1386 | } |
1387 | 1387 | ||
1388 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | 1388 | list_for_each_safe(le, next, &serv->sv_tempsocks) { |
1389 | svsk = list_entry(le, struct svc_sock, sk_list); | 1389 | svsk = list_entry(le, struct svc_sock, sk_list); |
1390 | 1390 | ||
1391 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) | 1391 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) |
1392 | continue; | 1392 | continue; |
1393 | if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags)) | 1393 | if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags)) |
1394 | continue; | 1394 | continue; |
1395 | atomic_inc(&svsk->sk_inuse); | 1395 | atomic_inc(&svsk->sk_inuse); |
1396 | list_move(le, &to_be_aged); | 1396 | list_move(le, &to_be_aged); |
1397 | set_bit(SK_CLOSE, &svsk->sk_flags); | 1397 | set_bit(SK_CLOSE, &svsk->sk_flags); |
1398 | set_bit(SK_DETACHED, &svsk->sk_flags); | 1398 | set_bit(SK_DETACHED, &svsk->sk_flags); |
1399 | } | 1399 | } |
1400 | spin_unlock_bh(&serv->sv_lock); | 1400 | spin_unlock_bh(&serv->sv_lock); |
1401 | 1401 | ||
1402 | while (!list_empty(&to_be_aged)) { | 1402 | while (!list_empty(&to_be_aged)) { |
1403 | le = to_be_aged.next; | 1403 | le = to_be_aged.next; |
1404 | /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */ | 1404 | /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */ |
1405 | list_del_init(le); | 1405 | list_del_init(le); |
1406 | svsk = list_entry(le, struct svc_sock, sk_list); | 1406 | svsk = list_entry(le, struct svc_sock, sk_list); |
1407 | 1407 | ||
1408 | dprintk("queuing svsk %p for closing, %lu seconds old\n", | 1408 | dprintk("queuing svsk %p for closing, %lu seconds old\n", |
1409 | svsk, get_seconds() - svsk->sk_lastrecv); | 1409 | svsk, get_seconds() - svsk->sk_lastrecv); |
1410 | 1410 | ||
1411 | /* a thread will dequeue and close it soon */ | 1411 | /* a thread will dequeue and close it soon */ |
1412 | svc_sock_enqueue(svsk); | 1412 | svc_sock_enqueue(svsk); |
1413 | svc_sock_put(svsk); | 1413 | svc_sock_put(svsk); |
1414 | } | 1414 | } |
1415 | 1415 | ||
1416 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | 1416 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); |
1417 | } | 1417 | } |
1418 | 1418 | ||
1419 | /* | 1419 | /* |
1420 | * Initialize socket for RPC use and create svc_sock struct | 1420 | * Initialize socket for RPC use and create svc_sock struct |
1421 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. | 1421 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. |
1422 | */ | 1422 | */ |
1423 | static struct svc_sock * | 1423 | static struct svc_sock * |
1424 | svc_setup_socket(struct svc_serv *serv, struct socket *sock, | 1424 | svc_setup_socket(struct svc_serv *serv, struct socket *sock, |
1425 | int *errp, int pmap_register) | 1425 | int *errp, int pmap_register) |
1426 | { | 1426 | { |
1427 | struct svc_sock *svsk; | 1427 | struct svc_sock *svsk; |
1428 | struct sock *inet; | 1428 | struct sock *inet; |
1429 | 1429 | ||
1430 | dprintk("svc: svc_setup_socket %p\n", sock); | 1430 | dprintk("svc: svc_setup_socket %p\n", sock); |
1431 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { | 1431 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { |
1432 | *errp = -ENOMEM; | 1432 | *errp = -ENOMEM; |
1433 | return NULL; | 1433 | return NULL; |
1434 | } | 1434 | } |
1435 | 1435 | ||
1436 | inet = sock->sk; | 1436 | inet = sock->sk; |
1437 | 1437 | ||
1438 | /* Register socket with portmapper */ | 1438 | /* Register socket with portmapper */ |
1439 | if (*errp >= 0 && pmap_register) | 1439 | if (*errp >= 0 && pmap_register) |
1440 | *errp = svc_register(serv, inet->sk_protocol, | 1440 | *errp = svc_register(serv, inet->sk_protocol, |
1441 | ntohs(inet_sk(inet)->sport)); | 1441 | ntohs(inet_sk(inet)->sport)); |
1442 | 1442 | ||
1443 | if (*errp < 0) { | 1443 | if (*errp < 0) { |
1444 | kfree(svsk); | 1444 | kfree(svsk); |
1445 | return NULL; | 1445 | return NULL; |
1446 | } | 1446 | } |
1447 | 1447 | ||
1448 | set_bit(SK_BUSY, &svsk->sk_flags); | 1448 | set_bit(SK_BUSY, &svsk->sk_flags); |
1449 | inet->sk_user_data = svsk; | 1449 | inet->sk_user_data = svsk; |
1450 | svsk->sk_sock = sock; | 1450 | svsk->sk_sock = sock; |
1451 | svsk->sk_sk = inet; | 1451 | svsk->sk_sk = inet; |
1452 | svsk->sk_ostate = inet->sk_state_change; | 1452 | svsk->sk_ostate = inet->sk_state_change; |
1453 | svsk->sk_odata = inet->sk_data_ready; | 1453 | svsk->sk_odata = inet->sk_data_ready; |
1454 | svsk->sk_owspace = inet->sk_write_space; | 1454 | svsk->sk_owspace = inet->sk_write_space; |
1455 | svsk->sk_server = serv; | 1455 | svsk->sk_server = serv; |
1456 | atomic_set(&svsk->sk_inuse, 0); | 1456 | atomic_set(&svsk->sk_inuse, 0); |
1457 | svsk->sk_lastrecv = get_seconds(); | 1457 | svsk->sk_lastrecv = get_seconds(); |
1458 | spin_lock_init(&svsk->sk_defer_lock); | 1458 | spin_lock_init(&svsk->sk_defer_lock); |
1459 | INIT_LIST_HEAD(&svsk->sk_deferred); | 1459 | INIT_LIST_HEAD(&svsk->sk_deferred); |
1460 | INIT_LIST_HEAD(&svsk->sk_ready); | 1460 | INIT_LIST_HEAD(&svsk->sk_ready); |
1461 | mutex_init(&svsk->sk_mutex); | 1461 | mutex_init(&svsk->sk_mutex); |
1462 | 1462 | ||
1463 | /* Initialize the socket */ | 1463 | /* Initialize the socket */ |
1464 | if (sock->type == SOCK_DGRAM) | 1464 | if (sock->type == SOCK_DGRAM) |
1465 | svc_udp_init(svsk); | 1465 | svc_udp_init(svsk); |
1466 | else | 1466 | else |
1467 | svc_tcp_init(svsk); | 1467 | svc_tcp_init(svsk); |
1468 | 1468 | ||
1469 | spin_lock_bh(&serv->sv_lock); | 1469 | spin_lock_bh(&serv->sv_lock); |
1470 | if (!pmap_register) { | 1470 | if (!pmap_register) { |
1471 | set_bit(SK_TEMP, &svsk->sk_flags); | 1471 | set_bit(SK_TEMP, &svsk->sk_flags); |
1472 | list_add(&svsk->sk_list, &serv->sv_tempsocks); | 1472 | list_add(&svsk->sk_list, &serv->sv_tempsocks); |
1473 | serv->sv_tmpcnt++; | 1473 | serv->sv_tmpcnt++; |
1474 | if (serv->sv_temptimer.function == NULL) { | 1474 | if (serv->sv_temptimer.function == NULL) { |
1475 | /* setup timer to age temp sockets */ | 1475 | /* setup timer to age temp sockets */ |
1476 | setup_timer(&serv->sv_temptimer, svc_age_temp_sockets, | 1476 | setup_timer(&serv->sv_temptimer, svc_age_temp_sockets, |
1477 | (unsigned long)serv); | 1477 | (unsigned long)serv); |
1478 | mod_timer(&serv->sv_temptimer, | 1478 | mod_timer(&serv->sv_temptimer, |
1479 | jiffies + svc_conn_age_period * HZ); | 1479 | jiffies + svc_conn_age_period * HZ); |
1480 | } | 1480 | } |
1481 | } else { | 1481 | } else { |
1482 | clear_bit(SK_TEMP, &svsk->sk_flags); | 1482 | clear_bit(SK_TEMP, &svsk->sk_flags); |
1483 | list_add(&svsk->sk_list, &serv->sv_permsocks); | 1483 | list_add(&svsk->sk_list, &serv->sv_permsocks); |
1484 | } | 1484 | } |
1485 | spin_unlock_bh(&serv->sv_lock); | 1485 | spin_unlock_bh(&serv->sv_lock); |
1486 | 1486 | ||
1487 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", | 1487 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", |
1488 | svsk, svsk->sk_sk); | 1488 | svsk, svsk->sk_sk); |
1489 | 1489 | ||
1490 | clear_bit(SK_BUSY, &svsk->sk_flags); | 1490 | clear_bit(SK_BUSY, &svsk->sk_flags); |
1491 | svc_sock_enqueue(svsk); | 1491 | svc_sock_enqueue(svsk); |
1492 | return svsk; | 1492 | return svsk; |
1493 | } | 1493 | } |
1494 | 1494 | ||
1495 | int svc_addsock(struct svc_serv *serv, | 1495 | int svc_addsock(struct svc_serv *serv, |
1496 | int fd, | 1496 | int fd, |
1497 | char *name_return, | 1497 | char *name_return, |
1498 | int *proto) | 1498 | int *proto) |
1499 | { | 1499 | { |
1500 | int err = 0; | 1500 | int err = 0; |
1501 | struct socket *so = sockfd_lookup(fd, &err); | 1501 | struct socket *so = sockfd_lookup(fd, &err); |
1502 | struct svc_sock *svsk = NULL; | 1502 | struct svc_sock *svsk = NULL; |
1503 | 1503 | ||
1504 | if (!so) | 1504 | if (!so) |
1505 | return err; | 1505 | return err; |
1506 | if (so->sk->sk_family != AF_INET) | 1506 | if (so->sk->sk_family != AF_INET) |
1507 | err = -EAFNOSUPPORT; | 1507 | err = -EAFNOSUPPORT; |
1508 | else if (so->sk->sk_protocol != IPPROTO_TCP && | 1508 | else if (so->sk->sk_protocol != IPPROTO_TCP && |
1509 | so->sk->sk_protocol != IPPROTO_UDP) | 1509 | so->sk->sk_protocol != IPPROTO_UDP) |
1510 | err = -EPROTONOSUPPORT; | 1510 | err = -EPROTONOSUPPORT; |
1511 | else if (so->state > SS_UNCONNECTED) | 1511 | else if (so->state > SS_UNCONNECTED) |
1512 | err = -EISCONN; | 1512 | err = -EISCONN; |
1513 | else { | 1513 | else { |
1514 | svsk = svc_setup_socket(serv, so, &err, 1); | 1514 | svsk = svc_setup_socket(serv, so, &err, 1); |
1515 | if (svsk) | 1515 | if (svsk) |
1516 | err = 0; | 1516 | err = 0; |
1517 | } | 1517 | } |
1518 | if (err) { | 1518 | if (err) { |
1519 | sockfd_put(so); | 1519 | sockfd_put(so); |
1520 | return err; | 1520 | return err; |
1521 | } | 1521 | } |
1522 | if (proto) *proto = so->sk->sk_protocol; | 1522 | if (proto) *proto = so->sk->sk_protocol; |
1523 | return one_sock_name(name_return, svsk); | 1523 | return one_sock_name(name_return, svsk); |
1524 | } | 1524 | } |
1525 | EXPORT_SYMBOL_GPL(svc_addsock); | 1525 | EXPORT_SYMBOL_GPL(svc_addsock); |
1526 | 1526 | ||
1527 | /* | 1527 | /* |
1528 | * Create socket for RPC service. | 1528 | * Create socket for RPC service. |
1529 | */ | 1529 | */ |
1530 | static int | 1530 | static int |
1531 | svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) | 1531 | svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) |
1532 | { | 1532 | { |
1533 | struct svc_sock *svsk; | 1533 | struct svc_sock *svsk; |
1534 | struct socket *sock; | 1534 | struct socket *sock; |
1535 | int error; | 1535 | int error; |
1536 | int type; | 1536 | int type; |
1537 | 1537 | ||
1538 | dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n", | 1538 | dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n", |
1539 | serv->sv_program->pg_name, protocol, | 1539 | serv->sv_program->pg_name, protocol, |
1540 | NIPQUAD(sin->sin_addr.s_addr), | 1540 | NIPQUAD(sin->sin_addr.s_addr), |
1541 | ntohs(sin->sin_port)); | 1541 | ntohs(sin->sin_port)); |
1542 | 1542 | ||
1543 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { | 1543 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { |
1544 | printk(KERN_WARNING "svc: only UDP and TCP " | 1544 | printk(KERN_WARNING "svc: only UDP and TCP " |
1545 | "sockets supported\n"); | 1545 | "sockets supported\n"); |
1546 | return -EINVAL; | 1546 | return -EINVAL; |
1547 | } | 1547 | } |
1548 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | 1548 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; |
1549 | 1549 | ||
1550 | if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) | 1550 | if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) |
1551 | return error; | 1551 | return error; |
1552 | 1552 | ||
1553 | if (type == SOCK_STREAM) | 1553 | if (type == SOCK_STREAM) |
1554 | sock->sk->sk_reuse = 1; /* allow address reuse */ | 1554 | sock->sk->sk_reuse = 1; /* allow address reuse */ |
1555 | error = kernel_bind(sock, (struct sockaddr *) sin, | 1555 | error = kernel_bind(sock, (struct sockaddr *) sin, |
1556 | sizeof(*sin)); | 1556 | sizeof(*sin)); |
1557 | if (error < 0) | 1557 | if (error < 0) |
1558 | goto bummer; | 1558 | goto bummer; |
1559 | 1559 | ||
1560 | if (protocol == IPPROTO_TCP) { | 1560 | if (protocol == IPPROTO_TCP) { |
1561 | if ((error = kernel_listen(sock, 64)) < 0) | 1561 | if ((error = kernel_listen(sock, 64)) < 0) |
1562 | goto bummer; | 1562 | goto bummer; |
1563 | } | 1563 | } |
1564 | 1564 | ||
1565 | if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) | 1565 | if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) |
1566 | return 0; | 1566 | return 0; |
1567 | 1567 | ||
1568 | bummer: | 1568 | bummer: |
1569 | dprintk("svc: svc_create_socket error = %d\n", -error); | 1569 | dprintk("svc: svc_create_socket error = %d\n", -error); |
1570 | sock_release(sock); | 1570 | sock_release(sock); |
1571 | return error; | 1571 | return error; |
1572 | } | 1572 | } |
1573 | 1573 | ||
1574 | /* | 1574 | /* |
1575 | * Remove a dead socket | 1575 | * Remove a dead socket |
1576 | */ | 1576 | */ |
1577 | void | 1577 | void |
1578 | svc_delete_socket(struct svc_sock *svsk) | 1578 | svc_delete_socket(struct svc_sock *svsk) |
1579 | { | 1579 | { |
1580 | struct svc_serv *serv; | 1580 | struct svc_serv *serv; |
1581 | struct sock *sk; | 1581 | struct sock *sk; |
1582 | 1582 | ||
1583 | dprintk("svc: svc_delete_socket(%p)\n", svsk); | 1583 | dprintk("svc: svc_delete_socket(%p)\n", svsk); |
1584 | 1584 | ||
1585 | serv = svsk->sk_server; | 1585 | serv = svsk->sk_server; |
1586 | sk = svsk->sk_sk; | 1586 | sk = svsk->sk_sk; |
1587 | 1587 | ||
1588 | sk->sk_state_change = svsk->sk_ostate; | 1588 | sk->sk_state_change = svsk->sk_ostate; |
1589 | sk->sk_data_ready = svsk->sk_odata; | 1589 | sk->sk_data_ready = svsk->sk_odata; |
1590 | sk->sk_write_space = svsk->sk_owspace; | 1590 | sk->sk_write_space = svsk->sk_owspace; |
1591 | 1591 | ||
1592 | spin_lock_bh(&serv->sv_lock); | 1592 | spin_lock_bh(&serv->sv_lock); |
1593 | 1593 | ||
1594 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) | 1594 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) |
1595 | list_del_init(&svsk->sk_list); | 1595 | list_del_init(&svsk->sk_list); |
1596 | /* | 1596 | /* |
1597 | * We used to delete the svc_sock from whichever list | 1597 | * We used to delete the svc_sock from whichever list |
1598 | * it's sk_ready node was on, but we don't actually | 1598 | * it's sk_ready node was on, but we don't actually |
1599 | * need to. This is because the only time we're called | 1599 | * need to. This is because the only time we're called |
1600 | * while still attached to a queue, the queue itself | 1600 | * while still attached to a queue, the queue itself |
1601 | * is about to be destroyed (in svc_destroy). | 1601 | * is about to be destroyed (in svc_destroy). |
1602 | */ | 1602 | */ |
1603 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) | 1603 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) |
1604 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | 1604 | if (test_bit(SK_TEMP, &svsk->sk_flags)) |
1605 | serv->sv_tmpcnt--; | 1605 | serv->sv_tmpcnt--; |
1606 | 1606 | ||
1607 | if (!atomic_read(&svsk->sk_inuse)) { | 1607 | if (!atomic_read(&svsk->sk_inuse)) { |
1608 | spin_unlock_bh(&serv->sv_lock); | 1608 | spin_unlock_bh(&serv->sv_lock); |
1609 | if (svsk->sk_sock->file) | 1609 | if (svsk->sk_sock->file) |
1610 | sockfd_put(svsk->sk_sock); | 1610 | sockfd_put(svsk->sk_sock); |
1611 | else | 1611 | else |
1612 | sock_release(svsk->sk_sock); | 1612 | sock_release(svsk->sk_sock); |
1613 | if (svsk->sk_info_authunix != NULL) | ||
1614 | svcauth_unix_info_release(svsk->sk_info_authunix); | ||
1613 | kfree(svsk); | 1615 | kfree(svsk); |
1614 | } else { | 1616 | } else { |
1615 | spin_unlock_bh(&serv->sv_lock); | 1617 | spin_unlock_bh(&serv->sv_lock); |
1616 | dprintk(KERN_NOTICE "svc: server socket destroy delayed\n"); | 1618 | dprintk(KERN_NOTICE "svc: server socket destroy delayed\n"); |
1617 | /* svsk->sk_server = NULL; */ | 1619 | /* svsk->sk_server = NULL; */ |
1618 | } | 1620 | } |
1619 | } | 1621 | } |
1620 | 1622 | ||
1621 | /* | 1623 | /* |
1622 | * Make a socket for nfsd and lockd | 1624 | * Make a socket for nfsd and lockd |
1623 | */ | 1625 | */ |
1624 | int | 1626 | int |
1625 | svc_makesock(struct svc_serv *serv, int protocol, unsigned short port) | 1627 | svc_makesock(struct svc_serv *serv, int protocol, unsigned short port) |
1626 | { | 1628 | { |
1627 | struct sockaddr_in sin; | 1629 | struct sockaddr_in sin; |
1628 | 1630 | ||
1629 | dprintk("svc: creating socket proto = %d\n", protocol); | 1631 | dprintk("svc: creating socket proto = %d\n", protocol); |
1630 | sin.sin_family = AF_INET; | 1632 | sin.sin_family = AF_INET; |
1631 | sin.sin_addr.s_addr = INADDR_ANY; | 1633 | sin.sin_addr.s_addr = INADDR_ANY; |
1632 | sin.sin_port = htons(port); | 1634 | sin.sin_port = htons(port); |
1633 | return svc_create_socket(serv, protocol, &sin); | 1635 | return svc_create_socket(serv, protocol, &sin); |
1634 | } | 1636 | } |
1635 | 1637 | ||
1636 | /* | 1638 | /* |
1637 | * Handle defer and revisit of requests | 1639 | * Handle defer and revisit of requests |
1638 | */ | 1640 | */ |
1639 | 1641 | ||
1640 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | 1642 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) |
1641 | { | 1643 | { |
1642 | struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); | 1644 | struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); |
1643 | struct svc_sock *svsk; | 1645 | struct svc_sock *svsk; |
1644 | 1646 | ||
1645 | if (too_many) { | 1647 | if (too_many) { |
1646 | svc_sock_put(dr->svsk); | 1648 | svc_sock_put(dr->svsk); |
1647 | kfree(dr); | 1649 | kfree(dr); |
1648 | return; | 1650 | return; |
1649 | } | 1651 | } |
1650 | dprintk("revisit queued\n"); | 1652 | dprintk("revisit queued\n"); |
1651 | svsk = dr->svsk; | 1653 | svsk = dr->svsk; |
1652 | dr->svsk = NULL; | 1654 | dr->svsk = NULL; |
1653 | spin_lock_bh(&svsk->sk_defer_lock); | 1655 | spin_lock_bh(&svsk->sk_defer_lock); |
1654 | list_add(&dr->handle.recent, &svsk->sk_deferred); | 1656 | list_add(&dr->handle.recent, &svsk->sk_deferred); |
1655 | spin_unlock_bh(&svsk->sk_defer_lock); | 1657 | spin_unlock_bh(&svsk->sk_defer_lock); |
1656 | set_bit(SK_DEFERRED, &svsk->sk_flags); | 1658 | set_bit(SK_DEFERRED, &svsk->sk_flags); |
1657 | svc_sock_enqueue(svsk); | 1659 | svc_sock_enqueue(svsk); |
1658 | svc_sock_put(svsk); | 1660 | svc_sock_put(svsk); |
1659 | } | 1661 | } |
1660 | 1662 | ||
1661 | static struct cache_deferred_req * | 1663 | static struct cache_deferred_req * |
1662 | svc_defer(struct cache_req *req) | 1664 | svc_defer(struct cache_req *req) |
1663 | { | 1665 | { |
1664 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | 1666 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); |
1665 | int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len); | 1667 | int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len); |
1666 | struct svc_deferred_req *dr; | 1668 | struct svc_deferred_req *dr; |
1667 | 1669 | ||
1668 | if (rqstp->rq_arg.page_len) | 1670 | if (rqstp->rq_arg.page_len) |
1669 | return NULL; /* if more than a page, give up FIXME */ | 1671 | return NULL; /* if more than a page, give up FIXME */ |
1670 | if (rqstp->rq_deferred) { | 1672 | if (rqstp->rq_deferred) { |
1671 | dr = rqstp->rq_deferred; | 1673 | dr = rqstp->rq_deferred; |
1672 | rqstp->rq_deferred = NULL; | 1674 | rqstp->rq_deferred = NULL; |
1673 | } else { | 1675 | } else { |
1674 | int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | 1676 | int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; |
1675 | /* FIXME maybe discard if size too large */ | 1677 | /* FIXME maybe discard if size too large */ |
1676 | dr = kmalloc(size, GFP_KERNEL); | 1678 | dr = kmalloc(size, GFP_KERNEL); |
1677 | if (dr == NULL) | 1679 | if (dr == NULL) |
1678 | return NULL; | 1680 | return NULL; |
1679 | 1681 | ||
1680 | dr->handle.owner = rqstp->rq_server; | 1682 | dr->handle.owner = rqstp->rq_server; |
1681 | dr->prot = rqstp->rq_prot; | 1683 | dr->prot = rqstp->rq_prot; |
1682 | dr->addr = rqstp->rq_addr; | 1684 | dr->addr = rqstp->rq_addr; |
1683 | dr->daddr = rqstp->rq_daddr; | 1685 | dr->daddr = rqstp->rq_daddr; |
1684 | dr->argslen = rqstp->rq_arg.len >> 2; | 1686 | dr->argslen = rqstp->rq_arg.len >> 2; |
1685 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); | 1687 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); |
1686 | } | 1688 | } |
1687 | atomic_inc(&rqstp->rq_sock->sk_inuse); | 1689 | atomic_inc(&rqstp->rq_sock->sk_inuse); |
1688 | dr->svsk = rqstp->rq_sock; | 1690 | dr->svsk = rqstp->rq_sock; |
1689 | 1691 | ||
1690 | dr->handle.revisit = svc_revisit; | 1692 | dr->handle.revisit = svc_revisit; |
1691 | return &dr->handle; | 1693 | return &dr->handle; |
1692 | } | 1694 | } |
1693 | 1695 | ||
1694 | /* | 1696 | /* |
1695 | * recv data from a deferred request into an active one | 1697 | * recv data from a deferred request into an active one |
1696 | */ | 1698 | */ |
1697 | static int svc_deferred_recv(struct svc_rqst *rqstp) | 1699 | static int svc_deferred_recv(struct svc_rqst *rqstp) |
1698 | { | 1700 | { |
1699 | struct svc_deferred_req *dr = rqstp->rq_deferred; | 1701 | struct svc_deferred_req *dr = rqstp->rq_deferred; |
1700 | 1702 | ||
1701 | rqstp->rq_arg.head[0].iov_base = dr->args; | 1703 | rqstp->rq_arg.head[0].iov_base = dr->args; |
1702 | rqstp->rq_arg.head[0].iov_len = dr->argslen<<2; | 1704 | rqstp->rq_arg.head[0].iov_len = dr->argslen<<2; |
1703 | rqstp->rq_arg.page_len = 0; | 1705 | rqstp->rq_arg.page_len = 0; |
1704 | rqstp->rq_arg.len = dr->argslen<<2; | 1706 | rqstp->rq_arg.len = dr->argslen<<2; |
1705 | rqstp->rq_prot = dr->prot; | 1707 | rqstp->rq_prot = dr->prot; |
1706 | rqstp->rq_addr = dr->addr; | 1708 | rqstp->rq_addr = dr->addr; |
1707 | rqstp->rq_daddr = dr->daddr; | 1709 | rqstp->rq_daddr = dr->daddr; |
1708 | rqstp->rq_respages = rqstp->rq_pages; | 1710 | rqstp->rq_respages = rqstp->rq_pages; |
1709 | return dr->argslen<<2; | 1711 | return dr->argslen<<2; |
1710 | } | 1712 | } |
1711 | 1713 | ||
1712 | 1714 | ||
1713 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) | 1715 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) |
1714 | { | 1716 | { |
1715 | struct svc_deferred_req *dr = NULL; | 1717 | struct svc_deferred_req *dr = NULL; |
1716 | 1718 | ||
1717 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) | 1719 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) |
1718 | return NULL; | 1720 | return NULL; |
1719 | spin_lock_bh(&svsk->sk_defer_lock); | 1721 | spin_lock_bh(&svsk->sk_defer_lock); |
1720 | clear_bit(SK_DEFERRED, &svsk->sk_flags); | 1722 | clear_bit(SK_DEFERRED, &svsk->sk_flags); |
1721 | if (!list_empty(&svsk->sk_deferred)) { | 1723 | if (!list_empty(&svsk->sk_deferred)) { |
1722 | dr = list_entry(svsk->sk_deferred.next, | 1724 | dr = list_entry(svsk->sk_deferred.next, |
1723 | struct svc_deferred_req, | 1725 | struct svc_deferred_req, |
1724 | handle.recent); | 1726 | handle.recent); |
1725 | list_del_init(&dr->handle.recent); | 1727 | list_del_init(&dr->handle.recent); |
1726 | set_bit(SK_DEFERRED, &svsk->sk_flags); | 1728 | set_bit(SK_DEFERRED, &svsk->sk_flags); |
1727 | } | 1729 | } |
1728 | spin_unlock_bh(&svsk->sk_defer_lock); | 1730 | spin_unlock_bh(&svsk->sk_defer_lock); |
1729 | return dr; | 1731 | return dr; |
1730 | } | 1732 | } |
1731 | 1733 |