Commit 2ac232f37fa0e8551856a575fe299c47b65b4d66

Authored by Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  jbd: change the field "b_cow_tid" of struct journal_head from type unsigned to tid_t
  ext3.txt: update the links in the section "useful links" to the latest ones
  ext3: Fix data corruption in inodes with journalled data
  ext2: check xattr name_len before acquiring xattr_sem in ext2_xattr_get
  ext3: Fix compilation with -DDX_DEBUG
  quota: Remove unused declaration
  jbd: Use WRITE_SYNC in journal checkpoint.
  jbd: Fix oops in journal_remove_journal_head()
  ext3: Return -EINVAL when start is beyond the end of fs in ext3_trim_fs()
  ext3/ioctl.c: silence sparse warnings about different address spaces
  ext3/ext4 Documentation: remove bh/nobh since it has been deprecated
  ext3: Improve truncate error handling
  ext3: use proper little-endian bitops
  ext2: include fs.h into ext2_fs.h
  ext3: Fix oops in ext3_try_to_allocate_with_rsv()
  jbd: fix a bug of leaking jh->b_jcount
  jbd: remove dependency on __GFP_NOFAIL
  ext3: Convert ext3 to new truncate calling convention
  jbd: Add fixed tracepoints
  ext3: Add fixed tracepoints

Resolve conflicts in fs/ext3/fsync.c due to fsync locking push-down and
new fixed tracepoints.

Showing 23 changed files Side-by-side Diff

Documentation/filesystems/ext3.txt
... ... @@ -147,15 +147,6 @@
147 147 package for more details
148 148 (http://sourceforge.net/projects/linuxquota).
149 149  
150   -bh (*) ext3 associates buffer heads to data pages to
151   -nobh (a) cache disk block mapping information
152   - (b) link pages into transaction to provide
153   - ordering guarantees.
154   - "bh" option forces use of buffer heads.
155   - "nobh" option tries to avoid associating buffer
156   - heads (supported only for "writeback" mode).
157   -
158   -
159 150 Specification
160 151 =============
161 152 Ext3 shares all disk implementation with the ext2 filesystem, and adds
... ... @@ -227,6 +218,6 @@
227 218 programs: http://e2fsprogs.sourceforge.net/
228 219 http://ext2resize.sourceforge.net
229 220  
230   -useful links: http://www.ibm.com/developerworks/library/l-fs7.html
231   - http://www.ibm.com/developerworks/library/l-fs8.html
  221 +useful links: http://www.ibm.com/developerworks/library/l-fs7/index.html
  222 + http://www.ibm.com/developerworks/library/l-fs8/index.html
Documentation/filesystems/ext4.txt
... ... @@ -68,12 +68,12 @@
68 68 '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
69 69 for a fair comparison. When tuning ext3 for best benchmark numbers,
70 70 it is often worthwhile to try changing the data journaling mode; '-o
71   - data=writeback,nobh' can be faster for some workloads. (Note
72   - however that running mounted with data=writeback can potentially
73   - leave stale data exposed in recently written files in case of an
74   - unclean shutdown, which could be a security exposure in some
75   - situations.) Configuring the filesystem with a large journal can
76   - also be helpful for metadata-intensive workloads.
  71 + data=writeback' can be faster for some workloads. (Note however that
  72 + running mounted with data=writeback can potentially leave stale data
  73 + exposed in recently written files in case of an unclean shutdown,
  74 + which could be a security exposure in some situations.) Configuring
  75 + the filesystem with a large journal can also be helpful for
  76 + metadata-intensive workloads.
77 77  
78 78 2. Features
79 79 ===========
... ... @@ -272,14 +272,6 @@
272 272 package for more details
273 273 (http://sourceforge.net/projects/linuxquota).
274 274  
275   -bh (*) ext4 associates buffer heads to data pages to
276   -nobh (a) cache disk block mapping information
277   - (b) link pages into transaction to provide
278   - ordering guarantees.
279   - "bh" option forces use of buffer heads.
280   - "nobh" option tries to avoid associating buffer
281   - heads (supported only for "writeback" mode).
282   -
283 275 stripe=n Number of filesystem blocks that mballoc will try
284 276 to use for allocation size and alignment. For RAID5/6
285 277 systems this should be the number of data
... ... @@ -393,8 +385,7 @@
393 385 write and convert the extent to initialized after IO
394 386 completes. This approach allows ext4 code to avoid
395 387 using inode mutex, which improves scalability on high
396   - speed storages. However this does not work with nobh
397   - option and the mount will fail. Nor does it work with
  388 + speed storages. However this does not work with
398 389 data journaling and dioread_nolock option will be
399 390 ignored with kernel warning. Note that dioread_nolock
400 391 code path is only used for extent-based files.
... ... @@ -161,6 +161,10 @@
161 161  
162 162 if (name == NULL)
163 163 return -EINVAL;
  164 + name_len = strlen(name);
  165 + if (name_len > 255)
  166 + return -ERANGE;
  167 +
164 168 down_read(&EXT2_I(inode)->xattr_sem);
165 169 error = -ENODATA;
166 170 if (!EXT2_I(inode)->i_file_acl)
167 171  
... ... @@ -181,12 +185,8 @@
181 185 error = -EIO;
182 186 goto cleanup;
183 187 }
184   - /* find named attribute */
185   - name_len = strlen(name);
186 188  
187   - error = -ERANGE;
188   - if (name_len > 255)
189   - goto cleanup;
  189 + /* find named attribute */
190 190 entry = FIRST_ENTRY(bh);
191 191 while (!IS_LAST_ENTRY(entry)) {
192 192 struct ext2_xattr_entry *next =
... ... @@ -21,6 +21,7 @@
21 21 #include <linux/quotaops.h>
22 22 #include <linux/buffer_head.h>
23 23 #include <linux/blkdev.h>
  24 +#include <trace/events/ext3.h>
24 25  
25 26 /*
26 27 * balloc.c contains the blocks allocation and deallocation routines
... ... @@ -161,6 +162,7 @@
161 162 desc = ext3_get_group_desc(sb, block_group, NULL);
162 163 if (!desc)
163 164 return NULL;
  165 + trace_ext3_read_block_bitmap(sb, block_group);
164 166 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
165 167 bh = sb_getblk(sb, bitmap_blk);
166 168 if (unlikely(!bh)) {
... ... @@ -351,6 +353,7 @@
351 353 struct rb_node * parent = NULL;
352 354 struct ext3_reserve_window_node *this;
353 355  
  356 + trace_ext3_rsv_window_add(sb, rsv);
354 357 while (*p)
355 358 {
356 359 parent = *p;
357 360  
... ... @@ -476,8 +479,10 @@
476 479 rsv = &block_i->rsv_window_node;
477 480 if (!rsv_is_empty(&rsv->rsv_window)) {
478 481 spin_lock(rsv_lock);
479   - if (!rsv_is_empty(&rsv->rsv_window))
  482 + if (!rsv_is_empty(&rsv->rsv_window)) {
  483 + trace_ext3_discard_reservation(inode, rsv);
480 484 rsv_window_remove(inode->i_sb, rsv);
  485 + }
481 486 spin_unlock(rsv_lock);
482 487 }
483 488 }
484 489  
... ... @@ -683,14 +688,10 @@
683 688 void ext3_free_blocks(handle_t *handle, struct inode *inode,
684 689 ext3_fsblk_t block, unsigned long count)
685 690 {
686   - struct super_block * sb;
  691 + struct super_block *sb = inode->i_sb;
687 692 unsigned long dquot_freed_blocks;
688 693  
689   - sb = inode->i_sb;
690   - if (!sb) {
691   - printk ("ext3_free_blocks: nonexistent device");
692   - return;
693   - }
  694 + trace_ext3_free_blocks(inode, block, count);
694 695 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
695 696 if (dquot_freed_blocks)
696 697 dquot_free_block(inode, dquot_freed_blocks);
... ... @@ -1136,6 +1137,7 @@
1136 1137 else
1137 1138 start_block = grp_goal + group_first_block;
1138 1139  
  1140 + trace_ext3_alloc_new_reservation(sb, start_block);
1139 1141 size = my_rsv->rsv_goal_size;
1140 1142  
1141 1143 if (!rsv_is_empty(&my_rsv->rsv_window)) {
1142 1144  
... ... @@ -1230,8 +1232,11 @@
1230 1232 * check if the first free block is within the
1231 1233 * free space we just reserved
1232 1234 */
1233   - if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
  1235 + if (start_block >= my_rsv->rsv_start &&
  1236 + start_block <= my_rsv->rsv_end) {
  1237 + trace_ext3_reserved(sb, start_block, my_rsv);
1234 1238 return 0; /* success */
  1239 + }
1235 1240 /*
1236 1241 * if the first free bit we found is out of the reservable space
1237 1242 * continue search for next reservable space,
... ... @@ -1514,10 +1519,6 @@
1514 1519  
1515 1520 *errp = -ENOSPC;
1516 1521 sb = inode->i_sb;
1517   - if (!sb) {
1518   - printk("ext3_new_block: nonexistent device");
1519   - return 0;
1520   - }
1521 1522  
1522 1523 /*
1523 1524 * Check quota for allocation of this block.
1524 1525  
... ... @@ -1528,8 +1529,10 @@
1528 1529 return 0;
1529 1530 }
1530 1531  
  1532 + trace_ext3_request_blocks(inode, goal, num);
  1533 +
1531 1534 sbi = EXT3_SB(sb);
1532   - es = EXT3_SB(sb)->s_es;
  1535 + es = sbi->s_es;
1533 1536 ext3_debug("goal=%lu.\n", goal);
1534 1537 /*
1535 1538 * Allocate a block from reservation only when
... ... @@ -1742,6 +1745,10 @@
1742 1745 brelse(bitmap_bh);
1743 1746 dquot_free_block(inode, *count-num);
1744 1747 *count = num;
  1748 +
  1749 + trace_ext3_allocate_blocks(inode, goal, num,
  1750 + (unsigned long long)ret_block);
  1751 +
1745 1752 return ret_block;
1746 1753  
1747 1754 io_error:
... ... @@ -1996,6 +2003,7 @@
1996 2003 if ((next - start) < minblocks)
1997 2004 goto free_extent;
1998 2005  
  2006 + trace_ext3_discard_blocks(sb, discard_block, next - start);
1999 2007 /* Send the TRIM command down to the device */
2000 2008 err = sb_issue_discard(sb, discard_block, next - start,
2001 2009 GFP_NOFS, 0);
... ... @@ -2100,7 +2108,7 @@
2100 2108 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
2101 2109 return -EINVAL;
2102 2110 if (start >= max_blks)
2103   - goto out;
  2111 + return -EINVAL;
2104 2112 if (start + len > max_blks)
2105 2113 len = max_blks - start;
2106 2114  
... ... @@ -2148,8 +2156,6 @@
2148 2156  
2149 2157 if (ret >= 0)
2150 2158 ret = 0;
2151   -
2152   -out:
2153 2159 range->len = trimmed * sb->s_blocksize;
2154 2160  
2155 2161 return ret;
... ... @@ -71,7 +71,6 @@
71 71 };
72 72  
73 73 const struct inode_operations ext3_file_inode_operations = {
74   - .truncate = ext3_truncate,
75 74 .setattr = ext3_setattr,
76 75 #ifdef CONFIG_EXT3_FS_XATTR
77 76 .setxattr = generic_setxattr,
... ... @@ -30,6 +30,7 @@
30 30 #include <linux/jbd.h>
31 31 #include <linux/ext3_fs.h>
32 32 #include <linux/ext3_jbd.h>
  33 +#include <trace/events/ext3.h>
33 34  
34 35 /*
35 36 * akpm: A new design for ext3_sync_file().
36 37  
... ... @@ -51,12 +52,14 @@
51 52 int ret, needs_barrier = 0;
52 53 tid_t commit_tid;
53 54  
  55 + trace_ext3_sync_file_enter(file, datasync);
  56 +
54 57 if (inode->i_sb->s_flags & MS_RDONLY)
55 58 return 0;
56 59  
57 60 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
58 61 if (ret)
59   - return ret;
  62 + goto out;
60 63  
61 64 /*
62 65 * Taking the mutex here just to keep consistent with how fsync was
... ... @@ -83,7 +86,8 @@
83 86 */
84 87 if (ext3_should_journal_data(inode)) {
85 88 mutex_unlock(&inode->i_mutex);
86   - return ext3_force_commit(inode->i_sb);
  89 + ret = ext3_force_commit(inode->i_sb);
  90 + goto out;
87 91 }
88 92  
89 93 if (datasync)
90 94  
... ... @@ -104,7 +108,10 @@
104 108 */
105 109 if (needs_barrier)
106 110 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
  111 +
107 112 mutex_unlock(&inode->i_mutex);
  113 +out:
  114 + trace_ext3_sync_file_exit(inode, ret);
108 115 return ret;
109 116 }
... ... @@ -23,6 +23,7 @@
23 23 #include <linux/buffer_head.h>
24 24 #include <linux/random.h>
25 25 #include <linux/bitops.h>
  26 +#include <trace/events/ext3.h>
26 27  
27 28 #include <asm/byteorder.h>
28 29  
... ... @@ -118,6 +119,7 @@
118 119  
119 120 ino = inode->i_ino;
120 121 ext3_debug ("freeing inode %lu\n", ino);
  122 + trace_ext3_free_inode(inode);
121 123  
122 124 is_directory = S_ISDIR(inode->i_mode);
123 125  
... ... @@ -426,6 +428,7 @@
426 428 return ERR_PTR(-EPERM);
427 429  
428 430 sb = dir->i_sb;
  431 + trace_ext3_request_inode(dir, mode);
429 432 inode = new_inode(sb);
430 433 if (!inode)
431 434 return ERR_PTR(-ENOMEM);
... ... @@ -601,6 +604,7 @@
601 604 }
602 605  
603 606 ext3_debug("allocating inode %lu\n", inode->i_ino);
  607 + trace_ext3_allocate_inode(inode, dir, mode);
604 608 goto really_out;
605 609 fail:
606 610 ext3_std_error(sb, err);
... ... @@ -38,10 +38,12 @@
38 38 #include <linux/bio.h>
39 39 #include <linux/fiemap.h>
40 40 #include <linux/namei.h>
  41 +#include <trace/events/ext3.h>
41 42 #include "xattr.h"
42 43 #include "acl.h"
43 44  
44 45 static int ext3_writepage_trans_blocks(struct inode *inode);
  46 +static int ext3_block_truncate_page(struct inode *inode, loff_t from);
45 47  
46 48 /*
47 49 * Test whether an inode is a fast symlink.
... ... @@ -70,6 +72,7 @@
70 72  
71 73 might_sleep();
72 74  
  75 + trace_ext3_forget(inode, is_metadata, blocknr);
73 76 BUFFER_TRACE(bh, "enter");
74 77  
75 78 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
76 79  
77 80  
78 81  
... ... @@ -194,20 +197,47 @@
194 197 */
195 198 void ext3_evict_inode (struct inode *inode)
196 199 {
  200 + struct ext3_inode_info *ei = EXT3_I(inode);
197 201 struct ext3_block_alloc_info *rsv;
198 202 handle_t *handle;
199 203 int want_delete = 0;
200 204  
  205 + trace_ext3_evict_inode(inode);
201 206 if (!inode->i_nlink && !is_bad_inode(inode)) {
202 207 dquot_initialize(inode);
203 208 want_delete = 1;
204 209 }
205 210  
  211 + /*
  212 + * When journalling data dirty buffers are tracked only in the journal.
  213 + * So although mm thinks everything is clean and ready for reaping the
  214 + * inode might still have some pages to write in the running
  215 + * transaction or waiting to be checkpointed. Thus calling
  216 + * journal_invalidatepage() (via truncate_inode_pages()) to discard
  217 + * these buffers can cause data loss. Also even if we did not discard
  218 + * these buffers, we would have no way to find them after the inode
  219 + * is reaped and thus user could see stale data if he tries to read
  220 + * them before the transaction is checkpointed. So be careful and
  221 + * force everything to disk here... We use ei->i_datasync_tid to
  222 + * store the newest transaction containing inode's data.
  223 + *
  224 + * Note that directories do not have this problem because they don't
  225 + * use page cache.
  226 + */
  227 + if (inode->i_nlink && ext3_should_journal_data(inode) &&
  228 + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
  229 + tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
  230 + journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
  231 +
  232 + log_start_commit(journal, commit_tid);
  233 + log_wait_commit(journal, commit_tid);
  234 + filemap_write_and_wait(&inode->i_data);
  235 + }
206 236 truncate_inode_pages(&inode->i_data, 0);
207 237  
208 238 ext3_discard_reservation(inode);
209   - rsv = EXT3_I(inode)->i_block_alloc_info;
210   - EXT3_I(inode)->i_block_alloc_info = NULL;
  239 + rsv = ei->i_block_alloc_info;
  240 + ei->i_block_alloc_info = NULL;
211 241 if (unlikely(rsv))
212 242 kfree(rsv);
213 243  
214 244  
... ... @@ -231,15 +261,13 @@
231 261 if (inode->i_blocks)
232 262 ext3_truncate(inode);
233 263 /*
234   - * Kill off the orphan record which ext3_truncate created.
235   - * AKPM: I think this can be inside the above `if'.
236   - * Note that ext3_orphan_del() has to be able to cope with the
237   - * deletion of a non-existent orphan - this is because we don't
238   - * know if ext3_truncate() actually created an orphan record.
239   - * (Well, we could do this if we need to, but heck - it works)
  264 + * Kill off the orphan record created when the inode lost the last
  265 + * link. Note that ext3_orphan_del() has to be able to cope with the
  266 + * deletion of a non-existent orphan - ext3_truncate() could
  267 + * have removed the record.
240 268 */
241 269 ext3_orphan_del(handle, inode);
242   - EXT3_I(inode)->i_dtime = get_seconds();
  270 + ei->i_dtime = get_seconds();
243 271  
244 272 /*
245 273 * One subtle ordering requirement: if anything has gone wrong
... ... @@ -842,6 +870,7 @@
842 870 ext3_fsblk_t first_block = 0;
843 871  
844 872  
  873 + trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
845 874 J_ASSERT(handle != NULL || create == 0);
846 875 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
847 876  
... ... @@ -886,6 +915,9 @@
886 915 if (!create || err == -EIO)
887 916 goto cleanup;
888 917  
  918 + /*
  919 + * Block out ext3_truncate while we alter the tree
  920 + */
889 921 mutex_lock(&ei->truncate_mutex);
890 922  
891 923 /*
... ... @@ -934,9 +966,6 @@
934 966 */
935 967 count = ext3_blks_to_allocate(partial, indirect_blks,
936 968 maxblocks, blocks_to_boundary);
937   - /*
938   - * Block out ext3_truncate while we alter the tree
939   - */
940 969 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
941 970 offsets + (partial - chain), partial);
942 971  
... ... @@ -970,6 +999,9 @@
970 999 }
971 1000 BUFFER_TRACE(bh_result, "returned");
972 1001 out:
  1002 + trace_ext3_get_blocks_exit(inode, iblock,
  1003 + depth ? le32_to_cpu(chain[depth-1].key) : 0,
  1004 + count, err);
973 1005 return err;
974 1006 }
975 1007  
... ... @@ -1202,6 +1234,16 @@
1202 1234 ext3_truncate(inode);
1203 1235 }
1204 1236  
  1237 +/*
  1238 + * Truncate blocks that were not used by direct IO write. We have to zero out
  1239 + * the last file block as well because direct IO might have written to it.
  1240 + */
  1241 +static void ext3_truncate_failed_direct_write(struct inode *inode)
  1242 +{
  1243 + ext3_block_truncate_page(inode, inode->i_size);
  1244 + ext3_truncate(inode);
  1245 +}
  1246 +
1205 1247 static int ext3_write_begin(struct file *file, struct address_space *mapping,
1206 1248 loff_t pos, unsigned len, unsigned flags,
1207 1249 struct page **pagep, void **fsdata)
... ... @@ -1217,6 +1259,8 @@
1217 1259 * we allocate blocks but write fails for some reason */
1218 1260 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
1219 1261  
  1262 + trace_ext3_write_begin(inode, pos, len, flags);
  1263 +
1220 1264 index = pos >> PAGE_CACHE_SHIFT;
1221 1265 from = pos & (PAGE_CACHE_SIZE - 1);
1222 1266 to = from + len;
... ... @@ -1332,6 +1376,7 @@
1332 1376 unsigned from, to;
1333 1377 int ret = 0, ret2;
1334 1378  
  1379 + trace_ext3_ordered_write_end(inode, pos, len, copied);
1335 1380 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1336 1381  
1337 1382 from = pos & (PAGE_CACHE_SIZE - 1);
... ... @@ -1367,6 +1412,7 @@
1367 1412 struct inode *inode = file->f_mapping->host;
1368 1413 int ret;
1369 1414  
  1415 + trace_ext3_writeback_write_end(inode, pos, len, copied);
1370 1416 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1371 1417 update_file_sizes(inode, pos, copied);
1372 1418 /*
1373 1419  
... ... @@ -1391,10 +1437,12 @@
1391 1437 {
1392 1438 handle_t *handle = ext3_journal_current_handle();
1393 1439 struct inode *inode = mapping->host;
  1440 + struct ext3_inode_info *ei = EXT3_I(inode);
1394 1441 int ret = 0, ret2;
1395 1442 int partial = 0;
1396 1443 unsigned from, to;
1397 1444  
  1445 + trace_ext3_journalled_write_end(inode, pos, len, copied);
1398 1446 from = pos & (PAGE_CACHE_SIZE - 1);
1399 1447 to = from + len;
1400 1448  
... ... @@ -1419,8 +1467,9 @@
1419 1467 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1420 1468 ext3_orphan_add(handle, inode);
1421 1469 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1422   - if (inode->i_size > EXT3_I(inode)->i_disksize) {
1423   - EXT3_I(inode)->i_disksize = inode->i_size;
  1470 + atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
  1471 + if (inode->i_size > ei->i_disksize) {
  1472 + ei->i_disksize = inode->i_size;
1424 1473 ret2 = ext3_mark_inode_dirty(handle, inode);
1425 1474 if (!ret)
1426 1475 ret = ret2;
... ... @@ -1577,6 +1626,7 @@
1577 1626 if (ext3_journal_current_handle())
1578 1627 goto out_fail;
1579 1628  
  1629 + trace_ext3_ordered_writepage(page);
1580 1630 if (!page_has_buffers(page)) {
1581 1631 create_empty_buffers(page, inode->i_sb->s_blocksize,
1582 1632 (1 << BH_Dirty)|(1 << BH_Uptodate));
... ... @@ -1647,6 +1697,7 @@
1647 1697 if (ext3_journal_current_handle())
1648 1698 goto out_fail;
1649 1699  
  1700 + trace_ext3_writeback_writepage(page);
1650 1701 if (page_has_buffers(page)) {
1651 1702 if (!walk_page_buffers(NULL, page_buffers(page), 0,
1652 1703 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
... ... @@ -1689,6 +1740,7 @@
1689 1740 if (ext3_journal_current_handle())
1690 1741 goto no_write;
1691 1742  
  1743 + trace_ext3_journalled_writepage(page);
1692 1744 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1693 1745 if (IS_ERR(handle)) {
1694 1746 ret = PTR_ERR(handle);
... ... @@ -1715,6 +1767,8 @@
1715 1767 if (ret == 0)
1716 1768 ret = err;
1717 1769 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
  1770 + atomic_set(&EXT3_I(inode)->i_datasync_tid,
  1771 + handle->h_transaction->t_tid);
1718 1772 unlock_page(page);
1719 1773 } else {
1720 1774 /*
... ... @@ -1739,6 +1793,7 @@
1739 1793  
1740 1794 static int ext3_readpage(struct file *file, struct page *page)
1741 1795 {
  1796 + trace_ext3_readpage(page);
1742 1797 return mpage_readpage(page, ext3_get_block);
1743 1798 }
1744 1799  
... ... @@ -1753,6 +1808,8 @@
1753 1808 {
1754 1809 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1755 1810  
  1811 + trace_ext3_invalidatepage(page, offset);
  1812 +
1756 1813 /*
1757 1814 * If it's a full truncate we just forget about the pending dirtying
1758 1815 */
... ... @@ -1766,6 +1823,7 @@
1766 1823 {
1767 1824 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1768 1825  
  1826 + trace_ext3_releasepage(page);
1769 1827 WARN_ON(PageChecked(page));
1770 1828 if (!page_has_buffers(page))
1771 1829 return 0;
... ... @@ -1794,6 +1852,8 @@
1794 1852 size_t count = iov_length(iov, nr_segs);
1795 1853 int retries = 0;
1796 1854  
  1855 + trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
  1856 +
1797 1857 if (rw == WRITE) {
1798 1858 loff_t final_size = offset + count;
1799 1859  
... ... @@ -1827,7 +1887,7 @@
1827 1887 loff_t end = offset + iov_length(iov, nr_segs);
1828 1888  
1829 1889 if (end > isize)
1830   - vmtruncate(inode, isize);
  1890 + ext3_truncate_failed_direct_write(inode);
1831 1891 }
1832 1892 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1833 1893 goto retry;
... ... @@ -1841,7 +1901,7 @@
1841 1901 /* This is really bad luck. We've written the data
1842 1902 * but cannot extend i_size. Truncate allocated blocks
1843 1903 * and pretend the write failed... */
1844   - ext3_truncate(inode);
  1904 + ext3_truncate_failed_direct_write(inode);
1845 1905 ret = PTR_ERR(handle);
1846 1906 goto out;
1847 1907 }
... ... @@ -1867,6 +1927,8 @@
1867 1927 ret = err;
1868 1928 }
1869 1929 out:
  1930 + trace_ext3_direct_IO_exit(inode, offset,
  1931 + iov_length(iov, nr_segs), rw, ret);
1870 1932 return ret;
1871 1933 }
1872 1934  
1873 1935  
1874 1936  
1875 1937  
1876 1938  
... ... @@ -1949,17 +2011,24 @@
1949 2011 * This required during truncate. We need to physically zero the tail end
1950 2012 * of that block so it doesn't yield old data if the file is later grown.
1951 2013 */
1952   -static int ext3_block_truncate_page(handle_t *handle, struct page *page,
1953   - struct address_space *mapping, loff_t from)
  2014 +static int ext3_block_truncate_page(struct inode *inode, loff_t from)
1954 2015 {
1955 2016 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
1956   - unsigned offset = from & (PAGE_CACHE_SIZE-1);
  2017 + unsigned offset = from & (PAGE_CACHE_SIZE - 1);
1957 2018 unsigned blocksize, iblock, length, pos;
1958   - struct inode *inode = mapping->host;
  2019 + struct page *page;
  2020 + handle_t *handle = NULL;
1959 2021 struct buffer_head *bh;
1960 2022 int err = 0;
1961 2023  
  2024 + /* Truncated on block boundary - nothing to do */
1962 2025 blocksize = inode->i_sb->s_blocksize;
  2026 + if ((from & (blocksize - 1)) == 0)
  2027 + return 0;
  2028 +
  2029 + page = grab_cache_page(inode->i_mapping, index);
  2030 + if (!page)
  2031 + return -ENOMEM;
1963 2032 length = blocksize - (offset & (blocksize - 1));
1964 2033 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1965 2034  
1966 2035  
... ... @@ -2004,11 +2073,23 @@
2004 2073 goto unlock;
2005 2074 }
2006 2075  
  2076 + /* data=writeback mode doesn't need transaction to zero-out data */
  2077 + if (!ext3_should_writeback_data(inode)) {
  2078 + /* We journal at most one block */
  2079 + handle = ext3_journal_start(inode, 1);
  2080 + if (IS_ERR(handle)) {
  2081 + clear_highpage(page);
  2082 + flush_dcache_page(page);
  2083 + err = PTR_ERR(handle);
  2084 + goto unlock;
  2085 + }
  2086 + }
  2087 +
2007 2088 if (ext3_should_journal_data(inode)) {
2008 2089 BUFFER_TRACE(bh, "get write access");
2009 2090 err = ext3_journal_get_write_access(handle, bh);
2010 2091 if (err)
2011   - goto unlock;
  2092 + goto stop;
2012 2093 }
2013 2094  
2014 2095 zero_user(page, offset, length);
... ... @@ -2022,6 +2103,9 @@
2022 2103 err = ext3_journal_dirty_data(handle, bh);
2023 2104 mark_buffer_dirty(bh);
2024 2105 }
  2106 +stop:
  2107 + if (handle)
  2108 + ext3_journal_stop(handle);
2025 2109  
2026 2110 unlock:
2027 2111 unlock_page(page);
... ... @@ -2390,8 +2474,6 @@
2390 2474  
2391 2475 int ext3_can_truncate(struct inode *inode)
2392 2476 {
2393   - if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2394   - return 0;
2395 2477 if (S_ISREG(inode->i_mode))
2396 2478 return 1;
2397 2479 if (S_ISDIR(inode->i_mode))
... ... @@ -2435,7 +2517,6 @@
2435 2517 struct ext3_inode_info *ei = EXT3_I(inode);
2436 2518 __le32 *i_data = ei->i_data;
2437 2519 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
2438   - struct address_space *mapping = inode->i_mapping;
2439 2520 int offsets[4];
2440 2521 Indirect chain[4];
2441 2522 Indirect *partial;
2442 2523  
2443 2524  
2444 2525  
2445 2526  
2446 2527  
... ... @@ -2443,45 +2524,21 @@
2443 2524 int n;
2444 2525 long last_block;
2445 2526 unsigned blocksize = inode->i_sb->s_blocksize;
2446   - struct page *page;
2447 2527  
  2528 + trace_ext3_truncate_enter(inode);
  2529 +
2448 2530 if (!ext3_can_truncate(inode))
2449 2531 goto out_notrans;
2450 2532  
2451 2533 if (inode->i_size == 0 && ext3_should_writeback_data(inode))
2452 2534 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
2453 2535  
2454   - /*
2455   - * We have to lock the EOF page here, because lock_page() nests
2456   - * outside journal_start().
2457   - */
2458   - if ((inode->i_size & (blocksize - 1)) == 0) {
2459   - /* Block boundary? Nothing to do */
2460   - page = NULL;
2461   - } else {
2462   - page = grab_cache_page(mapping,
2463   - inode->i_size >> PAGE_CACHE_SHIFT);
2464   - if (!page)
2465   - goto out_notrans;
2466   - }
2467   -
2468 2536 handle = start_transaction(inode);
2469   - if (IS_ERR(handle)) {
2470   - if (page) {
2471   - clear_highpage(page);
2472   - flush_dcache_page(page);
2473   - unlock_page(page);
2474   - page_cache_release(page);
2475   - }
  2537 + if (IS_ERR(handle))
2476 2538 goto out_notrans;
2477   - }
2478 2539  
2479 2540 last_block = (inode->i_size + blocksize-1)
2480 2541 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
2481   -
2482   - if (page)
2483   - ext3_block_truncate_page(handle, page, mapping, inode->i_size);
2484   -
2485 2542 n = ext3_block_to_path(inode, last_block, offsets, NULL);
2486 2543 if (n == 0)
2487 2544 goto out_stop; /* error */
... ... @@ -2596,6 +2653,7 @@
2596 2653 ext3_orphan_del(handle, inode);
2597 2654  
2598 2655 ext3_journal_stop(handle);
  2656 + trace_ext3_truncate_exit(inode);
2599 2657 return;
2600 2658 out_notrans:
2601 2659 /*
... ... @@ -2604,6 +2662,7 @@
2604 2662 */
2605 2663 if (inode->i_nlink)
2606 2664 ext3_orphan_del(NULL, inode);
  2665 + trace_ext3_truncate_exit(inode);
2607 2666 }
2608 2667  
2609 2668 static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
... ... @@ -2745,6 +2804,7 @@
2745 2804 * has in-inode xattrs, or we don't have this inode in memory.
2746 2805 * Read the block from disk.
2747 2806 */
  2807 + trace_ext3_load_inode(inode);
2748 2808 get_bh(bh);
2749 2809 bh->b_end_io = end_buffer_read_sync;
2750 2810 submit_bh(READ_META, bh);
2751 2811  
2752 2812  
2753 2813  
... ... @@ -3229,18 +3289,36 @@
3229 3289 }
3230 3290  
3231 3291 error = ext3_orphan_add(handle, inode);
  3292 + if (error) {
  3293 + ext3_journal_stop(handle);
  3294 + goto err_out;
  3295 + }
3232 3296 EXT3_I(inode)->i_disksize = attr->ia_size;
3233   - rc = ext3_mark_inode_dirty(handle, inode);
3234   - if (!error)
3235   - error = rc;
  3297 + error = ext3_mark_inode_dirty(handle, inode);
3236 3298 ext3_journal_stop(handle);
  3299 + if (error) {
  3300 + /* Some hard fs error must have happened. Bail out. */
  3301 + ext3_orphan_del(NULL, inode);
  3302 + goto err_out;
  3303 + }
  3304 + rc = ext3_block_truncate_page(inode, attr->ia_size);
  3305 + if (rc) {
  3306 + /* Cleanup orphan list and exit */
  3307 + handle = ext3_journal_start(inode, 3);
  3308 + if (IS_ERR(handle)) {
  3309 + ext3_orphan_del(NULL, inode);
  3310 + goto err_out;
  3311 + }
  3312 + ext3_orphan_del(handle, inode);
  3313 + ext3_journal_stop(handle);
  3314 + goto err_out;
  3315 + }
3237 3316 }
3238 3317  
3239 3318 if ((attr->ia_valid & ATTR_SIZE) &&
3240 3319 attr->ia_size != i_size_read(inode)) {
3241   - rc = vmtruncate(inode, attr->ia_size);
3242   - if (rc)
3243   - goto err_out;
  3320 + truncate_setsize(inode, attr->ia_size);
  3321 + ext3_truncate(inode);
3244 3322 }
3245 3323  
3246 3324 setattr_copy(inode, attr);
... ... @@ -3374,6 +3452,7 @@
3374 3452 int err;
3375 3453  
3376 3454 might_sleep();
  3455 + trace_ext3_mark_inode_dirty(inode, _RET_IP_);
3377 3456 err = ext3_reserve_inode_write(handle, inode, &iloc);
3378 3457 if (!err)
3379 3458 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
... ... @@ -285,7 +285,7 @@
285 285 if (!capable(CAP_SYS_ADMIN))
286 286 return -EPERM;
287 287  
288   - if (copy_from_user(&range, (struct fstrim_range *)arg,
  288 + if (copy_from_user(&range, (struct fstrim_range __user *)arg,
289 289 sizeof(range)))
290 290 return -EFAULT;
291 291  
... ... @@ -293,7 +293,7 @@
293 293 if (ret < 0)
294 294 return ret;
295 295  
296   - if (copy_to_user((struct fstrim_range *)arg, &range,
  296 + if (copy_to_user((struct fstrim_range __user *)arg, &range,
297 297 sizeof(range)))
298 298 return -EFAULT;
299 299  
... ... @@ -36,6 +36,7 @@
36 36 #include <linux/quotaops.h>
37 37 #include <linux/buffer_head.h>
38 38 #include <linux/bio.h>
  39 +#include <trace/events/ext3.h>
39 40  
40 41 #include "namei.h"
41 42 #include "xattr.h"
... ... @@ -287,7 +288,7 @@
287 288 while (len--) printk("%c", *name++);
288 289 ext3fs_dirhash(de->name, de->name_len, &h);
289 290 printk(":%x.%u ", h.hash,
290   - ((char *) de - base));
  291 + (unsigned) ((char *) de - base));
291 292 }
292 293 space += EXT3_DIR_REC_LEN(de->name_len);
293 294 names++;
... ... @@ -1013,7 +1014,7 @@
1013 1014  
1014 1015 *err = -ENOENT;
1015 1016 errout:
1016   - dxtrace(printk("%s not found\n", name));
  1017 + dxtrace(printk("%s not found\n", entry->name));
1017 1018 dx_release (frames);
1018 1019 return NULL;
1019 1020 }
... ... @@ -2140,6 +2141,7 @@
2140 2141 struct ext3_dir_entry_2 * de;
2141 2142 handle_t *handle;
2142 2143  
  2144 + trace_ext3_unlink_enter(dir, dentry);
2143 2145 /* Initialize quotas before so that eventual writes go
2144 2146 * in separate transaction */
2145 2147 dquot_initialize(dir);
... ... @@ -2185,6 +2187,7 @@
2185 2187 end_unlink:
2186 2188 ext3_journal_stop(handle);
2187 2189 brelse (bh);
  2190 + trace_ext3_unlink_exit(dentry, retval);
2188 2191 return retval;
2189 2192 }
2190 2193  
... ... @@ -44,6 +44,9 @@
44 44 #include "acl.h"
45 45 #include "namei.h"
46 46  
  47 +#define CREATE_TRACE_POINTS
  48 +#include <trace/events/ext3.h>
  49 +
47 50 #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
48 51 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
49 52 #else
... ... @@ -497,6 +500,14 @@
497 500 return &ei->vfs_inode;
498 501 }
499 502  
  503 +static int ext3_drop_inode(struct inode *inode)
  504 +{
  505 + int drop = generic_drop_inode(inode);
  506 +
  507 + trace_ext3_drop_inode(inode, drop);
  508 + return drop;
  509 +}
  510 +
500 511 static void ext3_i_callback(struct rcu_head *head)
501 512 {
502 513 struct inode *inode = container_of(head, struct inode, i_rcu);
... ... @@ -788,6 +799,7 @@
788 799 .destroy_inode = ext3_destroy_inode,
789 800 .write_inode = ext3_write_inode,
790 801 .dirty_inode = ext3_dirty_inode,
  802 + .drop_inode = ext3_drop_inode,
791 803 .evict_inode = ext3_evict_inode,
792 804 .put_super = ext3_put_super,
793 805 .sync_fs = ext3_sync_fs,
... ... @@ -2509,6 +2521,7 @@
2509 2521 {
2510 2522 tid_t target;
2511 2523  
  2524 + trace_ext3_sync_fs(sb, wait);
2512 2525 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2513 2526 if (wait)
2514 2527 log_wait_commit(EXT3_SB(sb)->s_journal, target);
... ... @@ -803,8 +803,16 @@
803 803 /* We need to allocate a new block */
804 804 ext3_fsblk_t goal = ext3_group_first_block_no(sb,
805 805 EXT3_I(inode)->i_block_group);
806   - ext3_fsblk_t block = ext3_new_block(handle, inode,
807   - goal, &error);
  806 + ext3_fsblk_t block;
  807 +
  808 + /*
  809 + * Protect us agaist concurrent allocations to the
  810 + * same inode from ext3_..._writepage(). Reservation
  811 + * code does not expect racing allocations.
  812 + */
  813 + mutex_lock(&EXT3_I(inode)->truncate_mutex);
  814 + block = ext3_new_block(handle, inode, goal, &error);
  815 + mutex_unlock(&EXT3_I(inode)->truncate_mutex);
808 816 if (error)
809 817 goto cleanup;
810 818 ea_idebug(inode, "creating block %d", block);
... ... @@ -22,6 +22,8 @@
22 22 #include <linux/jbd.h>
23 23 #include <linux/errno.h>
24 24 #include <linux/slab.h>
  25 +#include <linux/blkdev.h>
  26 +#include <trace/events/jbd.h>
25 27  
26 28 /*
27 29 * Unlink a buffer from a transaction checkpoint list.
28 30  
... ... @@ -95,10 +97,14 @@
95 97  
96 98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
97 99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
  100 + /*
  101 + * Get our reference so that bh cannot be freed before
  102 + * we unlock it
  103 + */
  104 + get_bh(bh);
98 105 JBUFFER_TRACE(jh, "remove from checkpoint list");
99 106 ret = __journal_remove_checkpoint(jh) + 1;
100 107 jbd_unlock_bh_state(bh);
101   - journal_remove_journal_head(bh);
102 108 BUFFER_TRACE(bh, "release");
103 109 __brelse(bh);
104 110 } else {
105 111  
... ... @@ -220,8 +226,8 @@
220 226 spin_lock(&journal->j_list_lock);
221 227 goto restart;
222 228 }
  229 + get_bh(bh);
223 230 if (buffer_locked(bh)) {
224   - get_bh(bh);
225 231 spin_unlock(&journal->j_list_lock);
226 232 jbd_unlock_bh_state(bh);
227 233 wait_on_buffer(bh);
... ... @@ -240,7 +246,6 @@
240 246 */
241 247 released = __journal_remove_checkpoint(jh);
242 248 jbd_unlock_bh_state(bh);
243   - journal_remove_journal_head(bh);
244 249 __brelse(bh);
245 250 }
246 251  
247 252  
248 253  
... ... @@ -253,9 +258,12 @@
253 258 __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
254 259 {
255 260 int i;
  261 + struct blk_plug plug;
256 262  
  263 + blk_start_plug(&plug);
257 264 for (i = 0; i < *batch_count; i++)
258   - write_dirty_buffer(bhs[i], WRITE);
  265 + write_dirty_buffer(bhs[i], WRITE_SYNC);
  266 + blk_finish_plug(&plug);
259 267  
260 268 for (i = 0; i < *batch_count; i++) {
261 269 struct buffer_head *bh = bhs[i];
262 270  
... ... @@ -304,12 +312,12 @@
304 312 ret = 1;
305 313 if (unlikely(buffer_write_io_error(bh)))
306 314 ret = -EIO;
  315 + get_bh(bh);
307 316 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
308 317 BUFFER_TRACE(bh, "remove from checkpoint");
309 318 __journal_remove_checkpoint(jh);
310 319 spin_unlock(&journal->j_list_lock);
311 320 jbd_unlock_bh_state(bh);
312   - journal_remove_journal_head(bh);
313 321 __brelse(bh);
314 322 } else {
315 323 /*
... ... @@ -358,6 +366,7 @@
358 366 * journal straight away.
359 367 */
360 368 result = cleanup_journal_tail(journal);
  369 + trace_jbd_checkpoint(journal, result);
361 370 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
362 371 if (result <= 0)
363 372 return result;
... ... @@ -503,6 +512,7 @@
503 512 if (blocknr < journal->j_tail)
504 513 freed = freed + journal->j_last - journal->j_first;
505 514  
  515 + trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
506 516 jbd_debug(1,
507 517 "Cleaning journal tail from %d to %d (offset %u), "
508 518 "freeing %u\n",
509 519  
... ... @@ -523,9 +533,9 @@
523 533 /*
524 534 * journal_clean_one_cp_list
525 535 *
526   - * Find all the written-back checkpoint buffers in the given list and release them.
  536 + * Find all the written-back checkpoint buffers in the given list and release
  537 + * them.
527 538 *
528   - * Called with the journal locked.
529 539 * Called with j_list_lock held.
530 540 * Returns number of bufers reaped (for debug)
531 541 */
532 542  
... ... @@ -632,8 +642,8 @@
632 642 * checkpoint lists.
633 643 *
634 644 * The function returns 1 if it frees the transaction, 0 otherwise.
  645 + * The function can free jh and bh.
635 646 *
636   - * This function is called with the journal locked.
637 647 * This function is called with j_list_lock held.
638 648 * This function is called with jbd_lock_bh_state(jh2bh(jh))
639 649 */
640 650  
641 651  
... ... @@ -652,13 +662,14 @@
652 662 }
653 663 journal = transaction->t_journal;
654 664  
  665 + JBUFFER_TRACE(jh, "removing from transaction");
655 666 __buffer_unlink(jh);
656 667 jh->b_cp_transaction = NULL;
  668 + journal_put_journal_head(jh);
657 669  
658 670 if (transaction->t_checkpoint_list != NULL ||
659 671 transaction->t_checkpoint_io_list != NULL)
660 672 goto out;
661   - JBUFFER_TRACE(jh, "transaction has no more buffers");
662 673  
663 674 /*
664 675 * There is one special case to worry about: if we have just pulled the
665 676  
... ... @@ -669,10 +680,8 @@
669 680 * The locking here around t_state is a bit sleazy.
670 681 * See the comment at the end of journal_commit_transaction().
671 682 */
672   - if (transaction->t_state != T_FINISHED) {
673   - JBUFFER_TRACE(jh, "belongs to running/committing transaction");
  683 + if (transaction->t_state != T_FINISHED)
674 684 goto out;
675   - }
676 685  
677 686 /* OK, that was the last buffer for the transaction: we can now
678 687 safely remove this transaction from the log */
... ... @@ -684,7 +693,6 @@
684 693 wake_up(&journal->j_wait_logspace);
685 694 ret = 1;
686 695 out:
687   - JBUFFER_TRACE(jh, "exit");
688 696 return ret;
689 697 }
690 698  
... ... @@ -703,6 +711,8 @@
703 711 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
704 712 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
705 713  
  714 + /* Get reference for checkpointing transaction */
  715 + journal_grab_journal_head(jh2bh(jh));
706 716 jh->b_cp_transaction = transaction;
707 717  
708 718 if (!transaction->t_checkpoint_list) {
... ... @@ -752,6 +762,7 @@
752 762 J_ASSERT(journal->j_committing_transaction != transaction);
753 763 J_ASSERT(journal->j_running_transaction != transaction);
754 764  
  765 + trace_jbd_drop_transaction(journal, transaction);
755 766 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
756 767 kfree(transaction);
757 768 }
... ... @@ -21,6 +21,7 @@
21 21 #include <linux/pagemap.h>
22 22 #include <linux/bio.h>
23 23 #include <linux/blkdev.h>
  24 +#include <trace/events/jbd.h>
24 25  
25 26 /*
26 27 * Default IO end handler for temporary BJ_IO buffer_heads.
... ... @@ -204,6 +205,8 @@
204 205 if (!trylock_buffer(bh)) {
205 206 BUFFER_TRACE(bh, "needs blocking lock");
206 207 spin_unlock(&journal->j_list_lock);
  208 + trace_jbd_do_submit_data(journal,
  209 + commit_transaction);
207 210 /* Write out all data to prevent deadlocks */
208 211 journal_do_submit_data(wbuf, bufs, write_op);
209 212 bufs = 0;
... ... @@ -236,6 +239,8 @@
236 239 jbd_unlock_bh_state(bh);
237 240 if (bufs == journal->j_wbufsize) {
238 241 spin_unlock(&journal->j_list_lock);
  242 + trace_jbd_do_submit_data(journal,
  243 + commit_transaction);
239 244 journal_do_submit_data(wbuf, bufs, write_op);
240 245 bufs = 0;
241 246 goto write_out_data;
... ... @@ -253,10 +258,6 @@
253 258 jbd_unlock_bh_state(bh);
254 259 if (locked)
255 260 unlock_buffer(bh);
256   - journal_remove_journal_head(bh);
257   - /* One for our safety reference, other for
258   - * journal_remove_journal_head() */
259   - put_bh(bh);
260 261 release_data_buffer(bh);
261 262 }
262 263  
... ... @@ -266,6 +267,7 @@
266 267 }
267 268 }
268 269 spin_unlock(&journal->j_list_lock);
  270 + trace_jbd_do_submit_data(journal, commit_transaction);
269 271 journal_do_submit_data(wbuf, bufs, write_op);
270 272  
271 273 return err;
272 274  
... ... @@ -316,12 +318,14 @@
316 318 commit_transaction = journal->j_running_transaction;
317 319 J_ASSERT(commit_transaction->t_state == T_RUNNING);
318 320  
  321 + trace_jbd_start_commit(journal, commit_transaction);
319 322 jbd_debug(1, "JBD: starting commit of transaction %d\n",
320 323 commit_transaction->t_tid);
321 324  
322 325 spin_lock(&journal->j_state_lock);
323 326 commit_transaction->t_state = T_LOCKED;
324 327  
  328 + trace_jbd_commit_locking(journal, commit_transaction);
325 329 spin_lock(&commit_transaction->t_handle_lock);
326 330 while (commit_transaction->t_updates) {
327 331 DEFINE_WAIT(wait);
... ... @@ -392,6 +396,7 @@
392 396 */
393 397 journal_switch_revoke_table(journal);
394 398  
  399 + trace_jbd_commit_flushing(journal, commit_transaction);
395 400 commit_transaction->t_state = T_FLUSH;
396 401 journal->j_committing_transaction = commit_transaction;
397 402 journal->j_running_transaction = NULL;
398 403  
... ... @@ -446,14 +451,9 @@
446 451 }
447 452 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
448 453 jh->b_transaction == commit_transaction &&
449   - jh->b_jlist == BJ_Locked) {
  454 + jh->b_jlist == BJ_Locked)
450 455 __journal_unfile_buffer(jh);
451   - jbd_unlock_bh_state(bh);
452   - journal_remove_journal_head(bh);
453   - put_bh(bh);
454   - } else {
455   - jbd_unlock_bh_state(bh);
456   - }
  456 + jbd_unlock_bh_state(bh);
457 457 release_data_buffer(bh);
458 458 cond_resched_lock(&journal->j_list_lock);
459 459 }
... ... @@ -493,6 +493,7 @@
493 493 commit_transaction->t_state = T_COMMIT;
494 494 spin_unlock(&journal->j_state_lock);
495 495  
  496 + trace_jbd_commit_logging(journal, commit_transaction);
496 497 J_ASSERT(commit_transaction->t_nr_buffers <=
497 498 commit_transaction->t_outstanding_credits);
498 499  
499 500  
... ... @@ -797,10 +798,16 @@
797 798 while (commit_transaction->t_forget) {
798 799 transaction_t *cp_transaction;
799 800 struct buffer_head *bh;
  801 + int try_to_free = 0;
800 802  
801 803 jh = commit_transaction->t_forget;
802 804 spin_unlock(&journal->j_list_lock);
803 805 bh = jh2bh(jh);
  806 + /*
  807 + * Get a reference so that bh cannot be freed before we are
  808 + * done with it.
  809 + */
  810 + get_bh(bh);
804 811 jbd_lock_bh_state(bh);
805 812 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
806 813 jh->b_transaction == journal->j_running_transaction);
807 814  
808 815  
809 816  
... ... @@ -858,28 +865,27 @@
858 865 __journal_insert_checkpoint(jh, commit_transaction);
859 866 if (is_journal_aborted(journal))
860 867 clear_buffer_jbddirty(bh);
861   - JBUFFER_TRACE(jh, "refile for checkpoint writeback");
862   - __journal_refile_buffer(jh);
863   - jbd_unlock_bh_state(bh);
864 868 } else {
865 869 J_ASSERT_BH(bh, !buffer_dirty(bh));
866   - /* The buffer on BJ_Forget list and not jbddirty means
  870 + /*
  871 + * The buffer on BJ_Forget list and not jbddirty means
867 872 * it has been freed by this transaction and hence it
868 873 * could not have been reallocated until this
869 874 * transaction has committed. *BUT* it could be
870 875 * reallocated once we have written all the data to
871 876 * disk and before we process the buffer on BJ_Forget
872   - * list. */
873   - JBUFFER_TRACE(jh, "refile or unfile freed buffer");
874   - __journal_refile_buffer(jh);
875   - if (!jh->b_transaction) {
876   - jbd_unlock_bh_state(bh);
877   - /* needs a brelse */
878   - journal_remove_journal_head(bh);
879   - release_buffer_page(bh);
880   - } else
881   - jbd_unlock_bh_state(bh);
  877 + * list.
  878 + */
  879 + if (!jh->b_next_transaction)
  880 + try_to_free = 1;
882 881 }
  882 + JBUFFER_TRACE(jh, "refile or unfile freed buffer");
  883 + __journal_refile_buffer(jh);
  884 + jbd_unlock_bh_state(bh);
  885 + if (try_to_free)
  886 + release_buffer_page(bh);
  887 + else
  888 + __brelse(bh);
883 889 cond_resched_lock(&journal->j_list_lock);
884 890 }
885 891 spin_unlock(&journal->j_list_lock);
... ... @@ -946,6 +952,7 @@
946 952 }
947 953 spin_unlock(&journal->j_list_lock);
948 954  
  955 + trace_jbd_end_commit(journal, commit_transaction);
949 956 jbd_debug(1, "JBD: commit %d complete, head %d\n",
950 957 journal->j_commit_sequence, journal->j_tail_sequence);
951 958  
... ... @@ -38,6 +38,9 @@
38 38 #include <linux/debugfs.h>
39 39 #include <linux/ratelimit.h>
40 40  
  41 +#define CREATE_TRACE_POINTS
  42 +#include <trace/events/jbd.h>
  43 +
41 44 #include <asm/uaccess.h>
42 45 #include <asm/page.h>
43 46  
... ... @@ -1065,6 +1068,7 @@
1065 1068 } else
1066 1069 write_dirty_buffer(bh, WRITE);
1067 1070  
  1071 + trace_jbd_update_superblock_end(journal, wait);
1068 1072 out:
1069 1073 /* If we have just flushed the log (by marking s_start==0), then
1070 1074 * any future commit will have to be careful to update the
... ... @@ -1799,10 +1803,9 @@
1799 1803 * When a buffer has its BH_JBD bit set it is immune from being released by
1800 1804 * core kernel code, mainly via ->b_count.
1801 1805 *
1802   - * A journal_head may be detached from its buffer_head when the journal_head's
1803   - * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
1804   - * Various places in JBD call journal_remove_journal_head() to indicate that the
1805   - * journal_head can be dropped if needed.
  1806 + * A journal_head is detached from its buffer_head when the journal_head's
  1807 + * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
  1808 + * transaction (b_cp_transaction) hold their references to b_jcount.
1806 1809 *
1807 1810 * Various places in the kernel want to attach a journal_head to a buffer_head
1808 1811 * _before_ attaching the journal_head to a transaction. To protect the
1809 1812  
... ... @@ -1815,17 +1818,16 @@
1815 1818 * (Attach a journal_head if needed. Increments b_jcount)
1816 1819 * struct journal_head *jh = journal_add_journal_head(bh);
1817 1820 * ...
1818   - * jh->b_transaction = xxx;
1819   - * journal_put_journal_head(jh);
1820   - *
1821   - * Now, the journal_head's b_jcount is zero, but it is safe from being released
1822   - * because it has a non-zero b_transaction.
  1821 + * (Get another reference for transaction)
  1822 + * journal_grab_journal_head(bh);
  1823 + * jh->b_transaction = xxx;
  1824 + * (Put original reference)
  1825 + * journal_put_journal_head(jh);
1823 1826 */
1824 1827  
1825 1828 /*
1826 1829 * Give a buffer_head a journal_head.
1827 1830 *
1828   - * Doesn't need the journal lock.
1829 1831 * May sleep.
1830 1832 */
1831 1833 struct journal_head *journal_add_journal_head(struct buffer_head *bh)
1832 1834  
1833 1835  
... ... @@ -1889,61 +1891,29 @@
1889 1891 struct journal_head *jh = bh2jh(bh);
1890 1892  
1891 1893 J_ASSERT_JH(jh, jh->b_jcount >= 0);
1892   -
1893   - get_bh(bh);
1894   - if (jh->b_jcount == 0) {
1895   - if (jh->b_transaction == NULL &&
1896   - jh->b_next_transaction == NULL &&
1897   - jh->b_cp_transaction == NULL) {
1898   - J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
1899   - J_ASSERT_BH(bh, buffer_jbd(bh));
1900   - J_ASSERT_BH(bh, jh2bh(jh) == bh);
1901   - BUFFER_TRACE(bh, "remove journal_head");
1902   - if (jh->b_frozen_data) {
1903   - printk(KERN_WARNING "%s: freeing "
1904   - "b_frozen_data\n",
1905   - __func__);
1906   - jbd_free(jh->b_frozen_data, bh->b_size);
1907   - }
1908   - if (jh->b_committed_data) {
1909   - printk(KERN_WARNING "%s: freeing "
1910   - "b_committed_data\n",
1911   - __func__);
1912   - jbd_free(jh->b_committed_data, bh->b_size);
1913   - }
1914   - bh->b_private = NULL;
1915   - jh->b_bh = NULL; /* debug, really */
1916   - clear_buffer_jbd(bh);
1917   - __brelse(bh);
1918   - journal_free_journal_head(jh);
1919   - } else {
1920   - BUFFER_TRACE(bh, "journal_head was locked");
1921   - }
  1894 + J_ASSERT_JH(jh, jh->b_transaction == NULL);
  1895 + J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
  1896 + J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
  1897 + J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
  1898 + J_ASSERT_BH(bh, buffer_jbd(bh));
  1899 + J_ASSERT_BH(bh, jh2bh(jh) == bh);
  1900 + BUFFER_TRACE(bh, "remove journal_head");
  1901 + if (jh->b_frozen_data) {
  1902 + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
  1903 + jbd_free(jh->b_frozen_data, bh->b_size);
1922 1904 }
  1905 + if (jh->b_committed_data) {
  1906 + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
  1907 + jbd_free(jh->b_committed_data, bh->b_size);
  1908 + }
  1909 + bh->b_private = NULL;
  1910 + jh->b_bh = NULL; /* debug, really */
  1911 + clear_buffer_jbd(bh);
  1912 + journal_free_journal_head(jh);
1923 1913 }
1924 1914  
1925 1915 /*
1926   - * journal_remove_journal_head(): if the buffer isn't attached to a transaction
1927   - * and has a zero b_jcount then remove and release its journal_head. If we did
1928   - * see that the buffer is not used by any transaction we also "logically"
1929   - * decrement ->b_count.
1930   - *
1931   - * We in fact take an additional increment on ->b_count as a convenience,
1932   - * because the caller usually wants to do additional things with the bh
1933   - * after calling here.
1934   - * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
1935   - * time. Once the caller has run __brelse(), the buffer is eligible for
1936   - * reaping by try_to_free_buffers().
1937   - */
1938   -void journal_remove_journal_head(struct buffer_head *bh)
1939   -{
1940   - jbd_lock_bh_journal_head(bh);
1941   - __journal_remove_journal_head(bh);
1942   - jbd_unlock_bh_journal_head(bh);
1943   -}
1944   -
1945   -/*
1946   - * Drop a reference on the passed journal_head. If it fell to zero then try to
  1916 + * Drop a reference on the passed journal_head. If it fell to zero then
1947 1917 * release the journal_head from the buffer_head.
1948 1918 */
1949 1919 void journal_put_journal_head(struct journal_head *jh)
1950 1920  
1951 1921  
... ... @@ -1953,11 +1923,12 @@
1953 1923 jbd_lock_bh_journal_head(bh);
1954 1924 J_ASSERT_JH(jh, jh->b_jcount > 0);
1955 1925 --jh->b_jcount;
1956   - if (!jh->b_jcount && !jh->b_transaction) {
  1926 + if (!jh->b_jcount) {
1957 1927 __journal_remove_journal_head(bh);
  1928 + jbd_unlock_bh_journal_head(bh);
1958 1929 __brelse(bh);
1959   - }
1960   - jbd_unlock_bh_journal_head(bh);
  1930 + } else
  1931 + jbd_unlock_bh_journal_head(bh);
1961 1932 }
1962 1933  
1963 1934 /*
fs/jbd/transaction.c
... ... @@ -26,6 +26,7 @@
26 26 #include <linux/mm.h>
27 27 #include <linux/highmem.h>
28 28 #include <linux/hrtimer.h>
  29 +#include <linux/backing-dev.h>
29 30  
30 31 static void __journal_temp_unlink_buffer(struct journal_head *jh);
31 32  
32 33  
... ... @@ -99,11 +100,10 @@
99 100  
100 101 alloc_transaction:
101 102 if (!journal->j_running_transaction) {
102   - new_transaction = kzalloc(sizeof(*new_transaction),
103   - GFP_NOFS|__GFP_NOFAIL);
  103 + new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
104 104 if (!new_transaction) {
105   - ret = -ENOMEM;
106   - goto out;
  105 + congestion_wait(BLK_RW_ASYNC, HZ/50);
  106 + goto alloc_transaction;
107 107 }
108 108 }
109 109  
... ... @@ -696,7 +696,6 @@
696 696 if (!jh->b_transaction) {
697 697 JBUFFER_TRACE(jh, "no transaction");
698 698 J_ASSERT_JH(jh, !jh->b_next_transaction);
699   - jh->b_transaction = transaction;
700 699 JBUFFER_TRACE(jh, "file as BJ_Reserved");
701 700 spin_lock(&journal->j_list_lock);
702 701 __journal_file_buffer(jh, transaction, BJ_Reserved);
... ... @@ -818,7 +817,6 @@
818 817 * committed and so it's safe to clear the dirty bit.
819 818 */
820 819 clear_buffer_dirty(jh2bh(jh));
821   - jh->b_transaction = transaction;
822 820  
823 821 /* first access by this transaction */
824 822 jh->b_modified = 0;
825 823  
... ... @@ -844,8 +842,8 @@
844 842 */
845 843 JBUFFER_TRACE(jh, "cancelling revoke");
846 844 journal_cancel_revoke(handle, jh);
847   - journal_put_journal_head(jh);
848 845 out:
  846 + journal_put_journal_head(jh);
849 847 return err;
850 848 }
851 849  
... ... @@ -1069,8 +1067,9 @@
1069 1067 ret = -EIO;
1070 1068 goto no_journal;
1071 1069 }
1072   -
1073   - if (jh->b_transaction != NULL) {
  1070 + /* We might have slept so buffer could be refiled now */
  1071 + if (jh->b_transaction != NULL &&
  1072 + jh->b_transaction != handle->h_transaction) {
1074 1073 JBUFFER_TRACE(jh, "unfile from commit");
1075 1074 __journal_temp_unlink_buffer(jh);
1076 1075 /* It still points to the committing
... ... @@ -1091,8 +1090,6 @@
1091 1090 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
1092 1091 JBUFFER_TRACE(jh, "not on correct data list: unfile");
1093 1092 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1094   - __journal_temp_unlink_buffer(jh);
1095   - jh->b_transaction = handle->h_transaction;
1096 1093 JBUFFER_TRACE(jh, "file as data");
1097 1094 __journal_file_buffer(jh, handle->h_transaction,
1098 1095 BJ_SyncData);
... ... @@ -1300,8 +1297,6 @@
1300 1297 __journal_file_buffer(jh, transaction, BJ_Forget);
1301 1298 } else {
1302 1299 __journal_unfile_buffer(jh);
1303   - journal_remove_journal_head(bh);
1304   - __brelse(bh);
1305 1300 if (!buffer_jbd(bh)) {
1306 1301 spin_unlock(&journal->j_list_lock);
1307 1302 jbd_unlock_bh_state(bh);
1308 1303  
1309 1304  
1310 1305  
... ... @@ -1622,19 +1617,32 @@
1622 1617 mark_buffer_dirty(bh); /* Expose it to the VM */
1623 1618 }
1624 1619  
  1620 +/*
  1621 + * Remove buffer from all transactions.
  1622 + *
  1623 + * Called with bh_state lock and j_list_lock
  1624 + *
  1625 + * jh and bh may be already freed when this function returns.
  1626 + */
1625 1627 void __journal_unfile_buffer(struct journal_head *jh)
1626 1628 {
1627 1629 __journal_temp_unlink_buffer(jh);
1628 1630 jh->b_transaction = NULL;
  1631 + journal_put_journal_head(jh);
1629 1632 }
1630 1633  
1631 1634 void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1632 1635 {
1633   - jbd_lock_bh_state(jh2bh(jh));
  1636 + struct buffer_head *bh = jh2bh(jh);
  1637 +
  1638 + /* Get reference so that buffer cannot be freed before we unlock it */
  1639 + get_bh(bh);
  1640 + jbd_lock_bh_state(bh);
1634 1641 spin_lock(&journal->j_list_lock);
1635 1642 __journal_unfile_buffer(jh);
1636 1643 spin_unlock(&journal->j_list_lock);
1637   - jbd_unlock_bh_state(jh2bh(jh));
  1644 + jbd_unlock_bh_state(bh);
  1645 + __brelse(bh);
1638 1646 }
1639 1647  
1640 1648 /*
1641 1649  
... ... @@ -1661,16 +1669,12 @@
1661 1669 /* A written-back ordered data buffer */
1662 1670 JBUFFER_TRACE(jh, "release data");
1663 1671 __journal_unfile_buffer(jh);
1664   - journal_remove_journal_head(bh);
1665   - __brelse(bh);
1666 1672 }
1667 1673 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1668 1674 /* written-back checkpointed metadata buffer */
1669 1675 if (jh->b_jlist == BJ_None) {
1670 1676 JBUFFER_TRACE(jh, "remove from checkpoint list");
1671 1677 __journal_remove_checkpoint(jh);
1672   - journal_remove_journal_head(bh);
1673   - __brelse(bh);
1674 1678 }
1675 1679 }
1676 1680 spin_unlock(&journal->j_list_lock);
... ... @@ -1733,7 +1737,7 @@
1733 1737 /*
1734 1738 * We take our own ref against the journal_head here to avoid
1735 1739 * having to add tons of locking around each instance of
1736   - * journal_remove_journal_head() and journal_put_journal_head().
  1740 + * journal_put_journal_head().
1737 1741 */
1738 1742 jh = journal_grab_journal_head(bh);
1739 1743 if (!jh)
1740 1744  
... ... @@ -1770,10 +1774,9 @@
1770 1774 int may_free = 1;
1771 1775 struct buffer_head *bh = jh2bh(jh);
1772 1776  
1773   - __journal_unfile_buffer(jh);
1774   -
1775 1777 if (jh->b_cp_transaction) {
1776 1778 JBUFFER_TRACE(jh, "on running+cp transaction");
  1779 + __journal_temp_unlink_buffer(jh);
1777 1780 /*
1778 1781 * We don't want to write the buffer anymore, clear the
1779 1782 * bit so that we don't confuse checks in
... ... @@ -1784,8 +1787,7 @@
1784 1787 may_free = 0;
1785 1788 } else {
1786 1789 JBUFFER_TRACE(jh, "on running transaction");
1787   - journal_remove_journal_head(bh);
1788   - __brelse(bh);
  1790 + __journal_unfile_buffer(jh);
1789 1791 }
1790 1792 return may_free;
1791 1793 }
... ... @@ -2070,6 +2072,8 @@
2070 2072  
2071 2073 if (jh->b_transaction)
2072 2074 __journal_temp_unlink_buffer(jh);
  2075 + else
  2076 + journal_grab_journal_head(bh);
2073 2077 jh->b_transaction = transaction;
2074 2078  
2075 2079 switch (jlist) {
2076 2080  
... ... @@ -2127,9 +2131,10 @@
2127 2131 * already started to be used by a subsequent transaction, refile the
2128 2132 * buffer on that transaction's metadata list.
2129 2133 *
2130   - * Called under journal->j_list_lock
2131   - *
  2134 + * Called under j_list_lock
2132 2135 * Called under jbd_lock_bh_state(jh2bh(jh))
  2136 + *
  2137 + * jh and bh may be already free when this function returns
2133 2138 */
2134 2139 void __journal_refile_buffer(struct journal_head *jh)
2135 2140 {
... ... @@ -2153,6 +2158,11 @@
2153 2158  
2154 2159 was_dirty = test_clear_buffer_jbddirty(bh);
2155 2160 __journal_temp_unlink_buffer(jh);
  2161 + /*
  2162 + * We set b_transaction here because b_next_transaction will inherit
  2163 + * our jh reference and thus __journal_file_buffer() must not take a
  2164 + * new one.
  2165 + */
2156 2166 jh->b_transaction = jh->b_next_transaction;
2157 2167 jh->b_next_transaction = NULL;
2158 2168 if (buffer_freed(bh))
2159 2169  
2160 2170  
2161 2171  
2162 2172  
... ... @@ -2169,30 +2179,21 @@
2169 2179 }
2170 2180  
2171 2181 /*
2172   - * For the unlocked version of this call, also make sure that any
2173   - * hanging journal_head is cleaned up if necessary.
  2182 + * __journal_refile_buffer() with necessary locking added. We take our bh
  2183 + * reference so that we can safely unlock bh.
2174 2184 *
2175   - * __journal_refile_buffer is usually called as part of a single locked
2176   - * operation on a buffer_head, in which the caller is probably going to
2177   - * be hooking the journal_head onto other lists. In that case it is up
2178   - * to the caller to remove the journal_head if necessary. For the
2179   - * unlocked journal_refile_buffer call, the caller isn't going to be
2180   - * doing anything else to the buffer so we need to do the cleanup
2181   - * ourselves to avoid a jh leak.
2182   - *
2183   - * *** The journal_head may be freed by this call! ***
  2185 + * The jh and bh may be freed by this call.
2184 2186 */
2185 2187 void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2186 2188 {
2187 2189 struct buffer_head *bh = jh2bh(jh);
2188 2190  
  2191 + /* Get reference so that buffer cannot be freed before we unlock it */
  2192 + get_bh(bh);
2189 2193 jbd_lock_bh_state(bh);
2190 2194 spin_lock(&journal->j_list_lock);
2191   -
2192 2195 __journal_refile_buffer(jh);
2193 2196 jbd_unlock_bh_state(bh);
2194   - journal_remove_journal_head(bh);
2195   -
2196 2197 spin_unlock(&journal->j_list_lock);
2197 2198 __brelse(bh);
2198 2199 }
include/linux/ext2_fs.h
... ... @@ -18,6 +18,7 @@
18 18  
19 19 #include <linux/types.h>
20 20 #include <linux/magic.h>
  21 +#include <linux/fs.h>
21 22  
22 23 /*
23 24 * The second extended filesystem constants/structures
include/linux/ext3_fs.h
... ... @@ -418,12 +418,11 @@
418 418 #define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
419 419 #endif
420 420  
421   -#define ext3_set_bit __test_and_set_bit_le
  421 +#define ext3_set_bit __set_bit_le
422 422 #define ext3_set_bit_atomic ext2_set_bit_atomic
423   -#define ext3_clear_bit __test_and_clear_bit_le
  423 +#define ext3_clear_bit __clear_bit_le
424 424 #define ext3_clear_bit_atomic ext2_clear_bit_atomic
425 425 #define ext3_test_bit test_bit_le
426   -#define ext3_find_first_zero_bit find_first_zero_bit_le
427 426 #define ext3_find_next_zero_bit find_next_zero_bit_le
428 427  
429 428 /*
... ... @@ -913,7 +912,7 @@
913 912 extern int ext3_change_inode_journal_flag(struct inode *, int);
914 913 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
915 914 extern int ext3_can_truncate(struct inode *inode);
916   -extern void ext3_truncate (struct inode *);
  915 +extern void ext3_truncate(struct inode *inode);
917 916 extern void ext3_set_inode_flags(struct inode *);
918 917 extern void ext3_get_inode_flags(struct ext3_inode_info *);
919 918 extern void ext3_set_aops(struct inode *inode);
... ... @@ -940,7 +940,6 @@
940 940 */
941 941 struct journal_head *journal_add_journal_head(struct buffer_head *bh);
942 942 struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
943   -void journal_remove_journal_head(struct buffer_head *bh);
944 943 void journal_put_journal_head(struct journal_head *jh);
945 944  
946 945 /*
include/linux/journal-head.h
... ... @@ -45,7 +45,7 @@
45 45 * has been cowed
46 46 * [jbd_lock_bh_state()]
47 47 */
48   - unsigned b_cow_tid;
  48 + tid_t b_cow_tid;
49 49  
50 50 /*
51 51 * Copy of the buffer data frozen for writing to the log.
include/linux/quota.h
... ... @@ -415,14 +415,6 @@
415 415 {QFMT_VFS_V0, "quota_v2"},\
416 416 {0, NULL}}
417 417  
418   -#else
419   -
420   -# /* nodep */ include <sys/cdefs.h>
421   -
422   -__BEGIN_DECLS
423   -long quotactl __P ((unsigned int, const char *, int, caddr_t));
424   -__END_DECLS
425   -
426 418 #endif /* __KERNEL__ */
427 419 #endif /* _QUOTA_ */
include/trace/events/ext3.h
  1 +#undef TRACE_SYSTEM
  2 +#define TRACE_SYSTEM ext3
  3 +
  4 +#if !defined(_TRACE_EXT3_H) || defined(TRACE_HEADER_MULTI_READ)
  5 +#define _TRACE_EXT3_H
  6 +
  7 +#include <linux/tracepoint.h>
  8 +
  9 +TRACE_EVENT(ext3_free_inode,
  10 + TP_PROTO(struct inode *inode),
  11 +
  12 + TP_ARGS(inode),
  13 +
  14 + TP_STRUCT__entry(
  15 + __field( dev_t, dev )
  16 + __field( ino_t, ino )
  17 + __field( umode_t, mode )
  18 + __field( uid_t, uid )
  19 + __field( gid_t, gid )
  20 + __field( blkcnt_t, blocks )
  21 + ),
  22 +
  23 + TP_fast_assign(
  24 + __entry->dev = inode->i_sb->s_dev;
  25 + __entry->ino = inode->i_ino;
  26 + __entry->mode = inode->i_mode;
  27 + __entry->uid = inode->i_uid;
  28 + __entry->gid = inode->i_gid;
  29 + __entry->blocks = inode->i_blocks;
  30 + ),
  31 +
  32 + TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %lu",
  33 + MAJOR(__entry->dev), MINOR(__entry->dev),
  34 + (unsigned long) __entry->ino,
  35 + __entry->mode, __entry->uid, __entry->gid,
  36 + (unsigned long) __entry->blocks)
  37 +);
  38 +
  39 +TRACE_EVENT(ext3_request_inode,
  40 + TP_PROTO(struct inode *dir, int mode),
  41 +
  42 + TP_ARGS(dir, mode),
  43 +
  44 + TP_STRUCT__entry(
  45 + __field( dev_t, dev )
  46 + __field( ino_t, dir )
  47 + __field( umode_t, mode )
  48 + ),
  49 +
  50 + TP_fast_assign(
  51 + __entry->dev = dir->i_sb->s_dev;
  52 + __entry->dir = dir->i_ino;
  53 + __entry->mode = mode;
  54 + ),
  55 +
  56 + TP_printk("dev %d,%d dir %lu mode 0%o",
  57 + MAJOR(__entry->dev), MINOR(__entry->dev),
  58 + (unsigned long) __entry->dir, __entry->mode)
  59 +);
  60 +
  61 +TRACE_EVENT(ext3_allocate_inode,
  62 + TP_PROTO(struct inode *inode, struct inode *dir, int mode),
  63 +
  64 + TP_ARGS(inode, dir, mode),
  65 +
  66 + TP_STRUCT__entry(
  67 + __field( dev_t, dev )
  68 + __field( ino_t, ino )
  69 + __field( ino_t, dir )
  70 + __field( umode_t, mode )
  71 + ),
  72 +
  73 + TP_fast_assign(
  74 + __entry->dev = inode->i_sb->s_dev;
  75 + __entry->ino = inode->i_ino;
  76 + __entry->dir = dir->i_ino;
  77 + __entry->mode = mode;
  78 + ),
  79 +
  80 + TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
  81 + MAJOR(__entry->dev), MINOR(__entry->dev),
  82 + (unsigned long) __entry->ino,
  83 + (unsigned long) __entry->dir, __entry->mode)
  84 +);
  85 +
  86 +TRACE_EVENT(ext3_evict_inode,
  87 + TP_PROTO(struct inode *inode),
  88 +
  89 + TP_ARGS(inode),
  90 +
  91 + TP_STRUCT__entry(
  92 + __field( dev_t, dev )
  93 + __field( ino_t, ino )
  94 + __field( int, nlink )
  95 + ),
  96 +
  97 + TP_fast_assign(
  98 + __entry->dev = inode->i_sb->s_dev;
  99 + __entry->ino = inode->i_ino;
  100 + __entry->nlink = inode->i_nlink;
  101 + ),
  102 +
  103 + TP_printk("dev %d,%d ino %lu nlink %d",
  104 + MAJOR(__entry->dev), MINOR(__entry->dev),
  105 + (unsigned long) __entry->ino, __entry->nlink)
  106 +);
  107 +
  108 +TRACE_EVENT(ext3_drop_inode,
  109 + TP_PROTO(struct inode *inode, int drop),
  110 +
  111 + TP_ARGS(inode, drop),
  112 +
  113 + TP_STRUCT__entry(
  114 + __field( dev_t, dev )
  115 + __field( ino_t, ino )
  116 + __field( int, drop )
  117 + ),
  118 +
  119 + TP_fast_assign(
  120 + __entry->dev = inode->i_sb->s_dev;
  121 + __entry->ino = inode->i_ino;
  122 + __entry->drop = drop;
  123 + ),
  124 +
  125 + TP_printk("dev %d,%d ino %lu drop %d",
  126 + MAJOR(__entry->dev), MINOR(__entry->dev),
  127 + (unsigned long) __entry->ino, __entry->drop)
  128 +);
  129 +
  130 +TRACE_EVENT(ext3_mark_inode_dirty,
  131 + TP_PROTO(struct inode *inode, unsigned long IP),
  132 +
  133 + TP_ARGS(inode, IP),
  134 +
  135 + TP_STRUCT__entry(
  136 + __field( dev_t, dev )
  137 + __field( ino_t, ino )
  138 + __field(unsigned long, ip )
  139 + ),
  140 +
  141 + TP_fast_assign(
  142 + __entry->dev = inode->i_sb->s_dev;
  143 + __entry->ino = inode->i_ino;
  144 + __entry->ip = IP;
  145 + ),
  146 +
  147 + TP_printk("dev %d,%d ino %lu caller %pF",
  148 + MAJOR(__entry->dev), MINOR(__entry->dev),
  149 + (unsigned long) __entry->ino, (void *)__entry->ip)
  150 +);
  151 +
  152 +TRACE_EVENT(ext3_write_begin,
  153 + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
  154 + unsigned int flags),
  155 +
  156 + TP_ARGS(inode, pos, len, flags),
  157 +
  158 + TP_STRUCT__entry(
  159 + __field( dev_t, dev )
  160 + __field( ino_t, ino )
  161 + __field( loff_t, pos )
  162 + __field( unsigned int, len )
  163 + __field( unsigned int, flags )
  164 + ),
  165 +
  166 + TP_fast_assign(
  167 + __entry->dev = inode->i_sb->s_dev;
  168 + __entry->ino = inode->i_ino;
  169 + __entry->pos = pos;
  170 + __entry->len = len;
  171 + __entry->flags = flags;
  172 + ),
  173 +
  174 + TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
  175 + MAJOR(__entry->dev), MINOR(__entry->dev),
  176 + (unsigned long) __entry->ino,
  177 + (unsigned long long) __entry->pos, __entry->len,
  178 + __entry->flags)
  179 +);
  180 +
  181 +DECLARE_EVENT_CLASS(ext3__write_end,
  182 + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
  183 + unsigned int copied),
  184 +
  185 + TP_ARGS(inode, pos, len, copied),
  186 +
  187 + TP_STRUCT__entry(
  188 + __field( dev_t, dev )
  189 + __field( ino_t, ino )
  190 + __field( loff_t, pos )
  191 + __field( unsigned int, len )
  192 + __field( unsigned int, copied )
  193 + ),
  194 +
  195 + TP_fast_assign(
  196 + __entry->dev = inode->i_sb->s_dev;
  197 + __entry->ino = inode->i_ino;
  198 + __entry->pos = pos;
  199 + __entry->len = len;
  200 + __entry->copied = copied;
  201 + ),
  202 +
  203 + TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
  204 + MAJOR(__entry->dev), MINOR(__entry->dev),
  205 + (unsigned long) __entry->ino,
  206 + (unsigned long long) __entry->pos, __entry->len,
  207 + __entry->copied)
  208 +);
  209 +
  210 +DEFINE_EVENT(ext3__write_end, ext3_ordered_write_end,
  211 +
  212 + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
  213 + unsigned int copied),
  214 +
  215 + TP_ARGS(inode, pos, len, copied)
  216 +);
  217 +
  218 +DEFINE_EVENT(ext3__write_end, ext3_writeback_write_end,
  219 +
  220 + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
  221 + unsigned int copied),
  222 +
  223 + TP_ARGS(inode, pos, len, copied)
  224 +);
  225 +
  226 +DEFINE_EVENT(ext3__write_end, ext3_journalled_write_end,
  227 +
  228 + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
  229 + unsigned int copied),
  230 +
  231 + TP_ARGS(inode, pos, len, copied)
  232 +);
  233 +
  234 +DECLARE_EVENT_CLASS(ext3__page_op,
  235 + TP_PROTO(struct page *page),
  236 +
  237 + TP_ARGS(page),
  238 +
  239 + TP_STRUCT__entry(
  240 + __field( dev_t, dev )
  241 + __field( ino_t, ino )
  242 + __field( pgoff_t, index )
  243 +
  244 + ),
  245 +
  246 + TP_fast_assign(
  247 + __entry->index = page->index;
  248 + __entry->ino = page->mapping->host->i_ino;
  249 + __entry->dev = page->mapping->host->i_sb->s_dev;
  250 + ),
  251 +
  252 + TP_printk("dev %d,%d ino %lu page_index %lu",
  253 + MAJOR(__entry->dev), MINOR(__entry->dev),
  254 + (unsigned long) __entry->ino, __entry->index)
  255 +);
  256 +
  257 +DEFINE_EVENT(ext3__page_op, ext3_ordered_writepage,
  258 +
  259 + TP_PROTO(struct page *page),
  260 +
  261 + TP_ARGS(page)
  262 +);
  263 +
  264 +DEFINE_EVENT(ext3__page_op, ext3_writeback_writepage,
  265 +
  266 + TP_PROTO(struct page *page),
  267 +
  268 + TP_ARGS(page)
  269 +);
  270 +
  271 +DEFINE_EVENT(ext3__page_op, ext3_journalled_writepage,
  272 +
  273 + TP_PROTO(struct page *page),
  274 +
  275 + TP_ARGS(page)
  276 +);
  277 +
  278 +DEFINE_EVENT(ext3__page_op, ext3_readpage,
  279 +
  280 + TP_PROTO(struct page *page),
  281 +
  282 + TP_ARGS(page)
  283 +);
  284 +
  285 +DEFINE_EVENT(ext3__page_op, ext3_releasepage,
  286 +
  287 + TP_PROTO(struct page *page),
  288 +
  289 + TP_ARGS(page)
  290 +);
  291 +
  292 +TRACE_EVENT(ext3_invalidatepage,
  293 + TP_PROTO(struct page *page, unsigned long offset),
  294 +
  295 + TP_ARGS(page, offset),
  296 +
  297 + TP_STRUCT__entry(
  298 + __field( pgoff_t, index )
  299 + __field( unsigned long, offset )
  300 + __field( ino_t, ino )
  301 + __field( dev_t, dev )
  302 +
  303 + ),
  304 +
  305 + TP_fast_assign(
  306 + __entry->index = page->index;
  307 + __entry->offset = offset;
  308 + __entry->ino = page->mapping->host->i_ino;
  309 + __entry->dev = page->mapping->host->i_sb->s_dev;
  310 + ),
  311 +
  312 + TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
  313 + MAJOR(__entry->dev), MINOR(__entry->dev),
  314 + (unsigned long) __entry->ino,
  315 + __entry->index, __entry->offset)
  316 +);
  317 +
  318 +TRACE_EVENT(ext3_discard_blocks,
  319 + TP_PROTO(struct super_block *sb, unsigned long blk,
  320 + unsigned long count),
  321 +
  322 + TP_ARGS(sb, blk, count),
  323 +
  324 + TP_STRUCT__entry(
  325 + __field( dev_t, dev )
  326 + __field( unsigned long, blk )
  327 + __field( unsigned long, count )
  328 +
  329 + ),
  330 +
  331 + TP_fast_assign(
  332 + __entry->dev = sb->s_dev;
  333 + __entry->blk = blk;
  334 + __entry->count = count;
  335 + ),
  336 +
  337 + TP_printk("dev %d,%d blk %lu count %lu",
  338 + MAJOR(__entry->dev), MINOR(__entry->dev),
  339 + __entry->blk, __entry->count)
  340 +);
  341 +
  342 +TRACE_EVENT(ext3_request_blocks,
  343 + TP_PROTO(struct inode *inode, unsigned long goal,
  344 + unsigned long count),
  345 +
  346 + TP_ARGS(inode, goal, count),
  347 +
  348 + TP_STRUCT__entry(
  349 + __field( dev_t, dev )
  350 + __field( ino_t, ino )
  351 + __field( unsigned long, count )
  352 + __field( unsigned long, goal )
  353 + ),
  354 +
  355 + TP_fast_assign(
  356 + __entry->dev = inode->i_sb->s_dev;
  357 + __entry->ino = inode->i_ino;
  358 + __entry->count = count;
  359 + __entry->goal = goal;
  360 + ),
  361 +
  362 + TP_printk("dev %d,%d ino %lu count %lu goal %lu ",
  363 + MAJOR(__entry->dev), MINOR(__entry->dev),
  364 + (unsigned long) __entry->ino,
  365 + __entry->count, __entry->goal)
  366 +);
  367 +
  368 +TRACE_EVENT(ext3_allocate_blocks,
  369 + TP_PROTO(struct inode *inode, unsigned long goal,
  370 + unsigned long count, unsigned long block),
  371 +
  372 + TP_ARGS(inode, goal, count, block),
  373 +
  374 + TP_STRUCT__entry(
  375 + __field( dev_t, dev )
  376 + __field( ino_t, ino )
  377 + __field( unsigned long, block )
  378 + __field( unsigned long, count )
  379 + __field( unsigned long, goal )
  380 + ),
  381 +
  382 + TP_fast_assign(
  383 + __entry->dev = inode->i_sb->s_dev;
  384 + __entry->ino = inode->i_ino;
  385 + __entry->block = block;
  386 + __entry->count = count;
  387 + __entry->goal = goal;
  388 + ),
  389 +
  390 + TP_printk("dev %d,%d ino %lu count %lu block %lu goal %lu",
  391 + MAJOR(__entry->dev), MINOR(__entry->dev),
  392 + (unsigned long) __entry->ino,
  393 + __entry->count, __entry->block,
  394 + __entry->goal)
  395 +);
  396 +
  397 +TRACE_EVENT(ext3_free_blocks,
  398 + TP_PROTO(struct inode *inode, unsigned long block,
  399 + unsigned long count),
  400 +
  401 + TP_ARGS(inode, block, count),
  402 +
  403 + TP_STRUCT__entry(
  404 + __field( dev_t, dev )
  405 + __field( ino_t, ino )
  406 + __field( umode_t, mode )
  407 + __field( unsigned long, block )
  408 + __field( unsigned long, count )
  409 + ),
  410 +
  411 + TP_fast_assign(
  412 + __entry->dev = inode->i_sb->s_dev;
  413 + __entry->ino = inode->i_ino;
  414 + __entry->mode = inode->i_mode;
  415 + __entry->block = block;
  416 + __entry->count = count;
  417 + ),
  418 +
  419 + TP_printk("dev %d,%d ino %lu mode 0%o block %lu count %lu",
  420 + MAJOR(__entry->dev), MINOR(__entry->dev),
  421 + (unsigned long) __entry->ino,
  422 + __entry->mode, __entry->block, __entry->count)
  423 +);
  424 +
  425 +TRACE_EVENT(ext3_sync_file_enter,
  426 + TP_PROTO(struct file *file, int datasync),
  427 +
  428 + TP_ARGS(file, datasync),
  429 +
  430 + TP_STRUCT__entry(
  431 + __field( dev_t, dev )
  432 + __field( ino_t, ino )
  433 + __field( ino_t, parent )
  434 + __field( int, datasync )
  435 + ),
  436 +
  437 + TP_fast_assign(
  438 + struct dentry *dentry = file->f_path.dentry;
  439 +
  440 + __entry->dev = dentry->d_inode->i_sb->s_dev;
  441 + __entry->ino = dentry->d_inode->i_ino;
  442 + __entry->datasync = datasync;
  443 + __entry->parent = dentry->d_parent->d_inode->i_ino;
  444 + ),
  445 +
  446 + TP_printk("dev %d,%d ino %lu parent %ld datasync %d ",
  447 + MAJOR(__entry->dev), MINOR(__entry->dev),
  448 + (unsigned long) __entry->ino,
  449 + (unsigned long) __entry->parent, __entry->datasync)
  450 +);
  451 +
  452 +TRACE_EVENT(ext3_sync_file_exit,
  453 + TP_PROTO(struct inode *inode, int ret),
  454 +
  455 + TP_ARGS(inode, ret),
  456 +
  457 + TP_STRUCT__entry(
  458 + __field( int, ret )
  459 + __field( ino_t, ino )
  460 + __field( dev_t, dev )
  461 + ),
  462 +
  463 + TP_fast_assign(
  464 + __entry->ret = ret;
  465 + __entry->ino = inode->i_ino;
  466 + __entry->dev = inode->i_sb->s_dev;
  467 + ),
  468 +
  469 + TP_printk("dev %d,%d ino %lu ret %d",
  470 + MAJOR(__entry->dev), MINOR(__entry->dev),
  471 + (unsigned long) __entry->ino,
  472 + __entry->ret)
  473 +);
  474 +
  475 +TRACE_EVENT(ext3_sync_fs,
  476 + TP_PROTO(struct super_block *sb, int wait),
  477 +
  478 + TP_ARGS(sb, wait),
  479 +
  480 + TP_STRUCT__entry(
  481 + __field( dev_t, dev )
  482 + __field( int, wait )
  483 +
  484 + ),
  485 +
  486 + TP_fast_assign(
  487 + __entry->dev = sb->s_dev;
  488 + __entry->wait = wait;
  489 + ),
  490 +
  491 + TP_printk("dev %d,%d wait %d",
  492 + MAJOR(__entry->dev), MINOR(__entry->dev),
  493 + __entry->wait)
  494 +);
  495 +
  496 +TRACE_EVENT(ext3_rsv_window_add,
  497 + TP_PROTO(struct super_block *sb,
  498 + struct ext3_reserve_window_node *rsv_node),
  499 +
  500 + TP_ARGS(sb, rsv_node),
  501 +
  502 + TP_STRUCT__entry(
  503 + __field( unsigned long, start )
  504 + __field( unsigned long, end )
  505 + __field( dev_t, dev )
  506 + ),
  507 +
  508 + TP_fast_assign(
  509 + __entry->dev = sb->s_dev;
  510 + __entry->start = rsv_node->rsv_window._rsv_start;
  511 + __entry->end = rsv_node->rsv_window._rsv_end;
  512 + ),
  513 +
  514 + TP_printk("dev %d,%d start %lu end %lu",
  515 + MAJOR(__entry->dev), MINOR(__entry->dev),
  516 + __entry->start, __entry->end)
  517 +);
  518 +
  519 +TRACE_EVENT(ext3_discard_reservation,
  520 + TP_PROTO(struct inode *inode,
  521 + struct ext3_reserve_window_node *rsv_node),
  522 +
  523 + TP_ARGS(inode, rsv_node),
  524 +
  525 + TP_STRUCT__entry(
  526 + __field( unsigned long, start )
  527 + __field( unsigned long, end )
  528 + __field( ino_t, ino )
  529 + __field( dev_t, dev )
  530 + ),
  531 +
  532 + TP_fast_assign(
  533 + __entry->start = rsv_node->rsv_window._rsv_start;
  534 + __entry->end = rsv_node->rsv_window._rsv_end;
  535 + __entry->ino = inode->i_ino;
  536 + __entry->dev = inode->i_sb->s_dev;
  537 + ),
  538 +
  539 + TP_printk("dev %d,%d ino %lu start %lu end %lu",
  540 + MAJOR(__entry->dev), MINOR(__entry->dev),
  541 + (unsigned long)__entry->ino, __entry->start,
  542 + __entry->end)
  543 +);
  544 +
  545 +TRACE_EVENT(ext3_alloc_new_reservation,
  546 + TP_PROTO(struct super_block *sb, unsigned long goal),
  547 +
  548 + TP_ARGS(sb, goal),
  549 +
  550 + TP_STRUCT__entry(
  551 + __field( dev_t, dev )
  552 + __field( unsigned long, goal )
  553 + ),
  554 +
  555 + TP_fast_assign(
  556 + __entry->dev = sb->s_dev;
  557 + __entry->goal = goal;
  558 + ),
  559 +
  560 + TP_printk("dev %d,%d goal %lu",
  561 + MAJOR(__entry->dev), MINOR(__entry->dev),
  562 + __entry->goal)
  563 +);
  564 +
  565 +TRACE_EVENT(ext3_reserved,
  566 + TP_PROTO(struct super_block *sb, unsigned long block,
  567 + struct ext3_reserve_window_node *rsv_node),
  568 +
  569 + TP_ARGS(sb, block, rsv_node),
  570 +
  571 + TP_STRUCT__entry(
  572 + __field( unsigned long, block )
  573 + __field( unsigned long, start )
  574 + __field( unsigned long, end )
  575 + __field( dev_t, dev )
  576 + ),
  577 +
  578 + TP_fast_assign(
  579 + __entry->block = block;
  580 + __entry->start = rsv_node->rsv_window._rsv_start;
  581 + __entry->end = rsv_node->rsv_window._rsv_end;
  582 + __entry->dev = sb->s_dev;
  583 + ),
  584 +
  585 + TP_printk("dev %d,%d block %lu, start %lu end %lu",
  586 + MAJOR(__entry->dev), MINOR(__entry->dev),
  587 + __entry->block, __entry->start, __entry->end)
  588 +);
  589 +
  590 +TRACE_EVENT(ext3_forget,
  591 + TP_PROTO(struct inode *inode, int is_metadata, unsigned long block),
  592 +
  593 + TP_ARGS(inode, is_metadata, block),
  594 +
  595 + TP_STRUCT__entry(
  596 + __field( dev_t, dev )
  597 + __field( ino_t, ino )
  598 + __field( umode_t, mode )
  599 + __field( int, is_metadata )
  600 + __field( unsigned long, block )
  601 + ),
  602 +
  603 + TP_fast_assign(
  604 + __entry->dev = inode->i_sb->s_dev;
  605 + __entry->ino = inode->i_ino;
  606 + __entry->mode = inode->i_mode;
  607 + __entry->is_metadata = is_metadata;
  608 + __entry->block = block;
  609 + ),
  610 +
  611 + TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %lu",
  612 + MAJOR(__entry->dev), MINOR(__entry->dev),
  613 + (unsigned long) __entry->ino,
  614 + __entry->mode, __entry->is_metadata, __entry->block)
  615 +);
  616 +
  617 +TRACE_EVENT(ext3_read_block_bitmap,
  618 + TP_PROTO(struct super_block *sb, unsigned int group),
  619 +
  620 + TP_ARGS(sb, group),
  621 +
  622 + TP_STRUCT__entry(
  623 + __field( dev_t, dev )
  624 + __field( __u32, group )
  625 +
  626 + ),
  627 +
  628 + TP_fast_assign(
  629 + __entry->dev = sb->s_dev;
  630 + __entry->group = group;
  631 + ),
  632 +
  633 + TP_printk("dev %d,%d group %u",
  634 + MAJOR(__entry->dev), MINOR(__entry->dev),
  635 + __entry->group)
  636 +);
  637 +
  638 +TRACE_EVENT(ext3_direct_IO_enter,
  639 + TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
  640 +
  641 + TP_ARGS(inode, offset, len, rw),
  642 +
  643 + TP_STRUCT__entry(
  644 + __field( ino_t, ino )
  645 + __field( dev_t, dev )
  646 + __field( loff_t, pos )
  647 + __field( unsigned long, len )
  648 + __field( int, rw )
  649 + ),
  650 +
  651 + TP_fast_assign(
  652 + __entry->ino = inode->i_ino;
  653 + __entry->dev = inode->i_sb->s_dev;
  654 + __entry->pos = offset;
  655 + __entry->len = len;
  656 + __entry->rw = rw;
  657 + ),
  658 +
  659 + TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
  660 + MAJOR(__entry->dev), MINOR(__entry->dev),
  661 + (unsigned long) __entry->ino,
  662 + (unsigned long long) __entry->pos, __entry->len,
  663 + __entry->rw)
  664 +);
  665 +
  666 +TRACE_EVENT(ext3_direct_IO_exit,
  667 + TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
  668 + int rw, int ret),
  669 +
  670 + TP_ARGS(inode, offset, len, rw, ret),
  671 +
  672 + TP_STRUCT__entry(
  673 + __field( ino_t, ino )
  674 + __field( dev_t, dev )
  675 + __field( loff_t, pos )
  676 + __field( unsigned long, len )
  677 + __field( int, rw )
  678 + __field( int, ret )
  679 + ),
  680 +
  681 + TP_fast_assign(
  682 + __entry->ino = inode->i_ino;
  683 + __entry->dev = inode->i_sb->s_dev;
  684 + __entry->pos = offset;
  685 + __entry->len = len;
  686 + __entry->rw = rw;
  687 + __entry->ret = ret;
  688 + ),
  689 +
  690 + TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
  691 + MAJOR(__entry->dev), MINOR(__entry->dev),
  692 + (unsigned long) __entry->ino,
  693 + (unsigned long long) __entry->pos, __entry->len,
  694 + __entry->rw, __entry->ret)
  695 +);
  696 +
  697 +TRACE_EVENT(ext3_unlink_enter,
  698 + TP_PROTO(struct inode *parent, struct dentry *dentry),
  699 +
  700 + TP_ARGS(parent, dentry),
  701 +
  702 + TP_STRUCT__entry(
  703 + __field( ino_t, parent )
  704 + __field( ino_t, ino )
  705 + __field( loff_t, size )
  706 + __field( dev_t, dev )
  707 + ),
  708 +
  709 + TP_fast_assign(
  710 + __entry->parent = parent->i_ino;
  711 + __entry->ino = dentry->d_inode->i_ino;
  712 + __entry->size = dentry->d_inode->i_size;
  713 + __entry->dev = dentry->d_inode->i_sb->s_dev;
  714 + ),
  715 +
  716 + TP_printk("dev %d,%d ino %lu size %lld parent %ld",
  717 + MAJOR(__entry->dev), MINOR(__entry->dev),
  718 + (unsigned long) __entry->ino,
  719 + (unsigned long long)__entry->size,
  720 + (unsigned long) __entry->parent)
  721 +);
  722 +
  723 +TRACE_EVENT(ext3_unlink_exit,
  724 + TP_PROTO(struct dentry *dentry, int ret),
  725 +
  726 + TP_ARGS(dentry, ret),
  727 +
  728 + TP_STRUCT__entry(
  729 + __field( ino_t, ino )
  730 + __field( dev_t, dev )
  731 + __field( int, ret )
  732 + ),
  733 +
  734 + TP_fast_assign(
  735 + __entry->ino = dentry->d_inode->i_ino;
  736 + __entry->dev = dentry->d_inode->i_sb->s_dev;
  737 + __entry->ret = ret;
  738 + ),
  739 +
  740 + TP_printk("dev %d,%d ino %lu ret %d",
  741 + MAJOR(__entry->dev), MINOR(__entry->dev),
  742 + (unsigned long) __entry->ino,
  743 + __entry->ret)
  744 +);
  745 +
  746 +DECLARE_EVENT_CLASS(ext3__truncate,
  747 + TP_PROTO(struct inode *inode),
  748 +
  749 + TP_ARGS(inode),
  750 +
  751 + TP_STRUCT__entry(
  752 + __field( ino_t, ino )
  753 + __field( dev_t, dev )
  754 + __field( blkcnt_t, blocks )
  755 + ),
  756 +
  757 + TP_fast_assign(
  758 + __entry->ino = inode->i_ino;
  759 + __entry->dev = inode->i_sb->s_dev;
  760 + __entry->blocks = inode->i_blocks;
  761 + ),
  762 +
  763 + TP_printk("dev %d,%d ino %lu blocks %lu",
  764 + MAJOR(__entry->dev), MINOR(__entry->dev),
  765 + (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
  766 +);
  767 +
  768 +DEFINE_EVENT(ext3__truncate, ext3_truncate_enter,
  769 +
  770 + TP_PROTO(struct inode *inode),
  771 +
  772 + TP_ARGS(inode)
  773 +);
  774 +
  775 +DEFINE_EVENT(ext3__truncate, ext3_truncate_exit,
  776 +
  777 + TP_PROTO(struct inode *inode),
  778 +
  779 + TP_ARGS(inode)
  780 +);
  781 +
  782 +TRACE_EVENT(ext3_get_blocks_enter,
  783 + TP_PROTO(struct inode *inode, unsigned long lblk,
  784 + unsigned long len, int create),
  785 +
  786 + TP_ARGS(inode, lblk, len, create),
  787 +
  788 + TP_STRUCT__entry(
  789 + __field( ino_t, ino )
  790 + __field( dev_t, dev )
  791 + __field( unsigned long, lblk )
  792 + __field( unsigned long, len )
  793 + __field( int, create )
  794 + ),
  795 +
  796 + TP_fast_assign(
  797 + __entry->ino = inode->i_ino;
  798 + __entry->dev = inode->i_sb->s_dev;
  799 + __entry->lblk = lblk;
  800 + __entry->len = len;
  801 + __entry->create = create;
  802 + ),
  803 +
  804 + TP_printk("dev %d,%d ino %lu lblk %lu len %lu create %u",
  805 + MAJOR(__entry->dev), MINOR(__entry->dev),
  806 + (unsigned long) __entry->ino,
  807 + __entry->lblk, __entry->len, __entry->create)
  808 +);
  809 +
  810 +TRACE_EVENT(ext3_get_blocks_exit,
  811 + TP_PROTO(struct inode *inode, unsigned long lblk,
  812 + unsigned long pblk, unsigned long len, int ret),
  813 +
  814 + TP_ARGS(inode, lblk, pblk, len, ret),
  815 +
  816 + TP_STRUCT__entry(
  817 + __field( ino_t, ino )
  818 + __field( dev_t, dev )
  819 + __field( unsigned long, lblk )
  820 + __field( unsigned long, pblk )
  821 + __field( unsigned long, len )
  822 + __field( int, ret )
  823 + ),
  824 +
  825 + TP_fast_assign(
  826 + __entry->ino = inode->i_ino;
  827 + __entry->dev = inode->i_sb->s_dev;
  828 + __entry->lblk = lblk;
  829 + __entry->pblk = pblk;
  830 + __entry->len = len;
  831 + __entry->ret = ret;
  832 + ),
  833 +
  834 + TP_printk("dev %d,%d ino %lu lblk %lu pblk %lu len %lu ret %d",
  835 + MAJOR(__entry->dev), MINOR(__entry->dev),
  836 + (unsigned long) __entry->ino,
  837 + __entry->lblk, __entry->pblk,
  838 + __entry->len, __entry->ret)
  839 +);
  840 +
  841 +TRACE_EVENT(ext3_load_inode,
  842 + TP_PROTO(struct inode *inode),
  843 +
  844 + TP_ARGS(inode),
  845 +
  846 + TP_STRUCT__entry(
  847 + __field( ino_t, ino )
  848 + __field( dev_t, dev )
  849 + ),
  850 +
  851 + TP_fast_assign(
  852 + __entry->ino = inode->i_ino;
  853 + __entry->dev = inode->i_sb->s_dev;
  854 + ),
  855 +
  856 + TP_printk("dev %d,%d ino %lu",
  857 + MAJOR(__entry->dev), MINOR(__entry->dev),
  858 + (unsigned long) __entry->ino)
  859 +);
  860 +
  861 +#endif /* _TRACE_EXT3_H */
  862 +
  863 +/* This part must be outside protection */
  864 +#include <trace/define_trace.h>
include/trace/events/jbd.h
  1 +#undef TRACE_SYSTEM
  2 +#define TRACE_SYSTEM jbd
  3 +
  4 +#if !defined(_TRACE_JBD_H) || defined(TRACE_HEADER_MULTI_READ)
  5 +#define _TRACE_JBD_H
  6 +
  7 +#include <linux/jbd.h>
  8 +#include <linux/tracepoint.h>
  9 +
  10 +TRACE_EVENT(jbd_checkpoint,
  11 +
  12 + TP_PROTO(journal_t *journal, int result),
  13 +
  14 + TP_ARGS(journal, result),
  15 +
  16 + TP_STRUCT__entry(
  17 + __field( dev_t, dev )
  18 + __field( int, result )
  19 + ),
  20 +
  21 + TP_fast_assign(
  22 + __entry->dev = journal->j_fs_dev->bd_dev;
  23 + __entry->result = result;
  24 + ),
  25 +
  26 + TP_printk("dev %d,%d result %d",
  27 + MAJOR(__entry->dev), MINOR(__entry->dev),
  28 + __entry->result)
  29 +);
  30 +
  31 +DECLARE_EVENT_CLASS(jbd_commit,
  32 +
  33 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
  34 +
  35 + TP_ARGS(journal, commit_transaction),
  36 +
  37 + TP_STRUCT__entry(
  38 + __field( dev_t, dev )
  39 + __field( char, sync_commit )
  40 + __field( int, transaction )
  41 + ),
  42 +
  43 + TP_fast_assign(
  44 + __entry->dev = journal->j_fs_dev->bd_dev;
  45 + __entry->sync_commit = commit_transaction->t_synchronous_commit;
  46 + __entry->transaction = commit_transaction->t_tid;
  47 + ),
  48 +
  49 + TP_printk("dev %d,%d transaction %d sync %d",
  50 + MAJOR(__entry->dev), MINOR(__entry->dev),
  51 + __entry->transaction, __entry->sync_commit)
  52 +);
  53 +
  54 +DEFINE_EVENT(jbd_commit, jbd_start_commit,
  55 +
  56 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
  57 +
  58 + TP_ARGS(journal, commit_transaction)
  59 +);
  60 +
  61 +DEFINE_EVENT(jbd_commit, jbd_commit_locking,
  62 +
  63 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
  64 +
  65 + TP_ARGS(journal, commit_transaction)
  66 +);
  67 +
  68 +DEFINE_EVENT(jbd_commit, jbd_commit_flushing,
  69 +
  70 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
  71 +
  72 + TP_ARGS(journal, commit_transaction)
  73 +);
  74 +
  75 +DEFINE_EVENT(jbd_commit, jbd_commit_logging,
  76 +
  77 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
  78 +
  79 + TP_ARGS(journal, commit_transaction)
  80 +);
  81 +
  82 +TRACE_EVENT(jbd_drop_transaction,
  83 +
  84 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
  85 +
  86 + TP_ARGS(journal, commit_transaction),
  87 +
  88 + TP_STRUCT__entry(
  89 + __field( dev_t, dev )
  90 + __field( char, sync_commit )
  91 + __field( int, transaction )
  92 + ),
  93 +
  94 + TP_fast_assign(
  95 + __entry->dev = journal->j_fs_dev->bd_dev;
  96 + __entry->sync_commit = commit_transaction->t_synchronous_commit;
  97 + __entry->transaction = commit_transaction->t_tid;
  98 + ),
  99 +
  100 + TP_printk("dev %d,%d transaction %d sync %d",
  101 + MAJOR(__entry->dev), MINOR(__entry->dev),
  102 + __entry->transaction, __entry->sync_commit)
  103 +);
  104 +
  105 +TRACE_EVENT(jbd_end_commit,
  106 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
  107 +
  108 + TP_ARGS(journal, commit_transaction),
  109 +
  110 + TP_STRUCT__entry(
  111 + __field( dev_t, dev )
  112 + __field( char, sync_commit )
  113 + __field( int, transaction )
  114 + __field( int, head )
  115 + ),
  116 +
  117 + TP_fast_assign(
  118 + __entry->dev = journal->j_fs_dev->bd_dev;
  119 + __entry->sync_commit = commit_transaction->t_synchronous_commit;
  120 + __entry->transaction = commit_transaction->t_tid;
  121 + __entry->head = journal->j_tail_sequence;
  122 + ),
  123 +
  124 + TP_printk("dev %d,%d transaction %d sync %d head %d",
  125 + MAJOR(__entry->dev), MINOR(__entry->dev),
  126 + __entry->transaction, __entry->sync_commit, __entry->head)
  127 +);
  128 +
  129 +TRACE_EVENT(jbd_do_submit_data,
  130 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
  131 +
  132 + TP_ARGS(journal, commit_transaction),
  133 +
  134 + TP_STRUCT__entry(
  135 + __field( dev_t, dev )
  136 + __field( char, sync_commit )
  137 + __field( int, transaction )
  138 + ),
  139 +
  140 + TP_fast_assign(
  141 + __entry->dev = journal->j_fs_dev->bd_dev;
  142 + __entry->sync_commit = commit_transaction->t_synchronous_commit;
  143 + __entry->transaction = commit_transaction->t_tid;
  144 + ),
  145 +
  146 + TP_printk("dev %d,%d transaction %d sync %d",
  147 + MAJOR(__entry->dev), MINOR(__entry->dev),
  148 + __entry->transaction, __entry->sync_commit)
  149 +);
  150 +
  151 +TRACE_EVENT(jbd_cleanup_journal_tail,
  152 +
  153 + TP_PROTO(journal_t *journal, tid_t first_tid,
  154 + unsigned long block_nr, unsigned long freed),
  155 +
  156 + TP_ARGS(journal, first_tid, block_nr, freed),
  157 +
  158 + TP_STRUCT__entry(
  159 + __field( dev_t, dev )
  160 + __field( tid_t, tail_sequence )
  161 + __field( tid_t, first_tid )
  162 + __field(unsigned long, block_nr )
  163 + __field(unsigned long, freed )
  164 + ),
  165 +
  166 + TP_fast_assign(
  167 + __entry->dev = journal->j_fs_dev->bd_dev;
  168 + __entry->tail_sequence = journal->j_tail_sequence;
  169 + __entry->first_tid = first_tid;
  170 + __entry->block_nr = block_nr;
  171 + __entry->freed = freed;
  172 + ),
  173 +
  174 + TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
  175 + MAJOR(__entry->dev), MINOR(__entry->dev),
  176 + __entry->tail_sequence, __entry->first_tid,
  177 + __entry->block_nr, __entry->freed)
  178 +);
  179 +
  180 +TRACE_EVENT(jbd_update_superblock_end,
  181 + TP_PROTO(journal_t *journal, int wait),
  182 +
  183 + TP_ARGS(journal, wait),
  184 +
  185 + TP_STRUCT__entry(
  186 + __field( dev_t, dev )
  187 + __field( int, wait )
  188 + ),
  189 +
  190 + TP_fast_assign(
  191 + __entry->dev = journal->j_fs_dev->bd_dev;
  192 + __entry->wait = wait;
  193 + ),
  194 +
  195 + TP_printk("dev %d,%d wait %d",
  196 + MAJOR(__entry->dev), MINOR(__entry->dev),
  197 + __entry->wait)
  198 +);
  199 +
  200 +#endif /* _TRACE_JBD_H */
  201 +
  202 +/* This part must be outside protection */
  203 +#include <trace/define_trace.h>