Commit 7b2b1fee30df7e2165525cd03f7d1d01a3a56794

Authored by Greg Banks
Committed by Linus Torvalds
1 parent fce1456a19

[PATCH] knfsd: knfsd: cache ipmap per TCP socket

Speed up high call-rate workloads by caching the struct ip_map for the peer on
the connected struct svc_sock instead of looking it up in the ip_map cache
hashtable on every call.  This helps workloads using AUTH_SYS authentication
over TCP.

Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16
synthetic client threads simulating an rsync (i.e.  recursive directory
listing) workload reading from an i386 RH9 install image (161480 regular files
in 10841 directories) on the server.  That tree is small enough to fill in the
server's RAM so no disk traffic was involved.  This setup gives a sustained
call rate in excess of 60000 calls/sec before being CPU-bound on the server.

Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each
CPU, and ip_map_lookup() was taking 2.9%.  This patch drops both contribution
into the profile noise.

Note that the above result overstates this value of this patch for most
workloads.  The synthetic clients are all using separate IP addresses, so
there are 64 entries in the ip_map cache hash.  Because the kernel measured
contained the bug fixed in commit

commit 1f1e030bf75774b6a283518e1534d598e14147d4

and was running on 64bit little-endian machine, probably all of those 64
entries were on a single chain, thus increasing the cost of ip_map_lookup().

With a modern kernel you would need more clients to see the same amount of
performance improvement.  This patch has helped to scale knfsd to handle a
deployment with 2000 NFS clients.

Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 5 changed files with 61 additions and 3 deletions Side-by-side Diff

include/linux/sunrpc/cache.h
... ... @@ -163,6 +163,17 @@
163 163 kref_put(&h->ref, cd->cache_put);
164 164 }
165 165  
  166 +static inline int cache_valid(struct cache_head *h)
  167 +{
  168 + /* If an item has been unhashed pending removal when
  169 + * the refcount drops to 0, the expiry_time will be
  170 + * set to 0. We don't want to consider such items
  171 + * valid in this context even though CACHE_VALID is
  172 + * set.
  173 + */
  174 + return (h->expiry_time != 0 && test_bit(CACHE_VALID, &h->flags));
  175 +}
  176 +
166 177 extern int cache_check(struct cache_detail *detail,
167 178 struct cache_head *h, struct cache_req *rqstp);
168 179 extern void cache_flush(void);
include/linux/sunrpc/svcauth.h
... ... @@ -126,6 +126,7 @@
126 126 extern struct auth_domain *auth_unix_lookup(struct in_addr addr);
127 127 extern int auth_unix_forget_old(struct auth_domain *dom);
128 128 extern void svcauth_unix_purge(void);
  129 +extern void svcauth_unix_info_release(void *);
129 130  
130 131 static inline unsigned long hash_str(char *name, int bits)
131 132 {
include/linux/sunrpc/svcsock.h
... ... @@ -54,6 +54,9 @@
54 54 int sk_reclen; /* length of record */
55 55 int sk_tcplen; /* current read length */
56 56 time_t sk_lastrecv; /* time of last received request */
  57 +
  58 + /* cache of various info for TCP sockets */
  59 + void *sk_info_authunix;
57 60 };
58 61  
59 62 /*
net/sunrpc/svcauth_unix.c
... ... @@ -9,6 +9,7 @@
9 9 #include <linux/seq_file.h>
10 10 #include <linux/hash.h>
11 11 #include <linux/string.h>
  12 +#include <net/sock.h>
12 13  
13 14 #define RPCDBG_FACILITY RPCDBG_AUTH
14 15  
... ... @@ -375,6 +376,44 @@
375 376 cache_purge(&ip_map_cache);
376 377 }
377 378  
  379 +static inline struct ip_map *
  380 +ip_map_cached_get(struct svc_rqst *rqstp)
  381 +{
  382 + struct ip_map *ipm = rqstp->rq_sock->sk_info_authunix;
  383 + if (ipm != NULL) {
  384 + if (!cache_valid(&ipm->h)) {
  385 + /*
  386 + * The entry has been invalidated since it was
  387 + * remembered, e.g. by a second mount from the
  388 + * same IP address.
  389 + */
  390 + rqstp->rq_sock->sk_info_authunix = NULL;
  391 + cache_put(&ipm->h, &ip_map_cache);
  392 + return NULL;
  393 + }
  394 + cache_get(&ipm->h);
  395 + }
  396 + return ipm;
  397 +}
  398 +
  399 +static inline void
  400 +ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
  401 +{
  402 + struct svc_sock *svsk = rqstp->rq_sock;
  403 +
  404 + if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL)
  405 + svsk->sk_info_authunix = ipm; /* newly cached, keep the reference */
  406 + else
  407 + cache_put(&ipm->h, &ip_map_cache);
  408 +}
  409 +
  410 +void
  411 +svcauth_unix_info_release(void *info)
  412 +{
  413 + struct ip_map *ipm = info;
  414 + cache_put(&ipm->h, &ip_map_cache);
  415 +}
  416 +
378 417 static int
379 418 svcauth_unix_set_client(struct svc_rqst *rqstp)
380 419 {
... ... @@ -384,8 +423,10 @@
384 423 if (rqstp->rq_proc == 0)
385 424 return SVC_OK;
386 425  
387   - ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
388   - rqstp->rq_addr.sin_addr);
  426 + ipm = ip_map_cached_get(rqstp);
  427 + if (ipm == NULL)
  428 + ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
  429 + rqstp->rq_addr.sin_addr);
389 430  
390 431 if (ipm == NULL)
391 432 return SVC_DENIED;
... ... @@ -400,7 +441,7 @@
400 441 case 0:
401 442 rqstp->rq_client = &ipm->m_client->h;
402 443 kref_get(&rqstp->rq_client->ref);
403   - cache_put(&ipm->h, &ip_map_cache);
  444 + ip_map_cached_put(rqstp, ipm);
404 445 break;
405 446 }
406 447 return SVC_OK;
net/sunrpc/svcsock.c
... ... @@ -1610,6 +1610,8 @@
1610 1610 sockfd_put(svsk->sk_sock);
1611 1611 else
1612 1612 sock_release(svsk->sk_sock);
  1613 + if (svsk->sk_info_authunix != NULL)
  1614 + svcauth_unix_info_release(svsk->sk_info_authunix);
1613 1615 kfree(svsk);
1614 1616 } else {
1615 1617 spin_unlock_bh(&serv->sv_lock);