Commit d8733c2956968a01394a4d2a9e97a8b431a78776

Authored by Andrew Morton
Committed by Linus Torvalds
1 parent b8e31edc10

[PATCH] ext3_readdir: use generic readahead

Linus points out that ext3_readdir's readahead only cuts in when
ext3_readdir() is operating at the very start of the directory.  So for large
directories we end up performing no readahead at all and we suck.

So take it all out and use the core VM's page_cache_readahead().  This means
that ext3 directory reads will use all of readahead's dynamic sizing goop.

Note that we're using the directory's filp->f_ra to hold the readahead state,
but readahead is actually being performed against the underlying blockdev's
address_space.  Fortunately the readahead code is all set up to handle this.

Tested with printk.  It works.  I was struggling to find a real workload which
actually cared.

(The patch also exports page_cache_readahead() to GPL modules)

Cc: "Stephen C. Tweedie" <sct@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 4 changed files with 32 additions and 32 deletions Side-by-side Diff

... ... @@ -95,11 +95,10 @@
95 95 void * dirent, filldir_t filldir)
96 96 {
97 97 int error = 0;
98   - unsigned long offset, blk;
99   - int i, num, stored;
100   - struct buffer_head * bh, * tmp, * bha[16];
101   - struct ext3_dir_entry_2 * de;
102   - struct super_block * sb;
  98 + unsigned long offset;
  99 + int i, stored;
  100 + struct ext3_dir_entry_2 *de;
  101 + struct super_block *sb;
103 102 int err;
104 103 struct inode *inode = filp->f_dentry->d_inode;
105 104 int ret = 0;
106 105  
107 106  
... ... @@ -124,38 +123,35 @@
124 123 }
125 124 #endif
126 125 stored = 0;
127   - bh = NULL;
128 126 offset = filp->f_pos & (sb->s_blocksize - 1);
129 127  
130 128 while (!error && !stored && filp->f_pos < inode->i_size) {
131   - blk = (filp->f_pos) >> EXT3_BLOCK_SIZE_BITS(sb);
132   - bh = ext3_bread(NULL, inode, blk, 0, &err);
  129 + unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
  130 + struct buffer_head map_bh;
  131 + struct buffer_head *bh = NULL;
  132 +
  133 + map_bh.b_state = 0;
  134 + err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0);
  135 + if (!err) {
  136 + page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
  137 + &filp->f_ra,
  138 + filp,
  139 + map_bh.b_blocknr >>
  140 + (PAGE_CACHE_SHIFT - inode->i_blkbits),
  141 + 1);
  142 + bh = ext3_bread(NULL, inode, blk, 0, &err);
  143 + }
  144 +
  145 + /*
  146 + * We ignore I/O errors on directories so users have a chance
  147 + * of recovering data when there's a bad sector
  148 + */
133 149 if (!bh) {
134 150 ext3_error (sb, "ext3_readdir",
135 151 "directory #%lu contains a hole at offset %lu",
136 152 inode->i_ino, (unsigned long)filp->f_pos);
137 153 filp->f_pos += sb->s_blocksize - offset;
138 154 continue;
139   - }
140   -
141   - /*
142   - * Do the readahead
143   - */
144   - if (!offset) {
145   - for (i = 16 >> (EXT3_BLOCK_SIZE_BITS(sb) - 9), num = 0;
146   - i > 0; i--) {
147   - tmp = ext3_getblk (NULL, inode, ++blk, 0, &err);
148   - if (tmp && !buffer_uptodate(tmp) &&
149   - !buffer_locked(tmp))
150   - bha[num++] = tmp;
151   - else
152   - brelse (tmp);
153   - }
154   - if (num) {
155   - ll_rw_block (READA, num, bha);
156   - for (i = 0; i < num; i++)
157   - brelse (bha[i]);
158   - }
159 155 }
160 156  
161 157 revalidate:
... ... @@ -671,7 +671,7 @@
671 671 * The BKL may not be held on entry here. Be sure to take it early.
672 672 */
673 673  
674   -static int
  674 +int
675 675 ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
676 676 struct buffer_head *bh_result, int create, int extend_disksize)
677 677 {
include/linux/ext3_fs.h
... ... @@ -772,9 +772,12 @@
772 772  
773 773  
774 774 /* inode.c */
775   -extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
776   -extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
777   -extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
  775 +int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
  776 +struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
  777 +struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
  778 +int ext3_get_block_handle(handle_t *handle, struct inode *inode,
  779 + sector_t iblock, struct buffer_head *bh_result, int create,
  780 + int extend_disksize);
778 781  
779 782 extern void ext3_read_inode (struct inode *);
780 783 extern int ext3_write_inode (struct inode *, int);
... ... @@ -555,6 +555,7 @@
555 555 out:
556 556 return ra->prev_page + 1;
557 557 }
  558 +EXPORT_SYMBOL_GPL(page_cache_readahead);
558 559  
559 560 /*
560 561 * handle_ra_miss() is called when it is known that a page which should have