Commit e06c8227fd94ec181849ba206bf032be31c4295c

Authored by Joel Becker
Committed by Mark Fasheh
1 parent 754938c142

jbd2: Add buffer triggers

Filesystems often to do compute intensive operation on some
metadata.  If this operation is repeated many times, it can be very
expensive.  It would be much nicer if the operation could be performed
once before a buffer goes to disk.

This adds triggers to jbd2 buffer heads.  Just before writing a metadata
buffer to the journal, jbd2 will optionally call a commit trigger associated
with the buffer.  If the journal is aborted, an abort trigger will be
called on any dirty buffers as they are dropped from pending
transactions.

ocfs2 will use this feature.

Initially I tried to come up with a more generic trigger that could be
used for non-buffer-related events like transaction completion.  It
doesn't tie nicely, because the information a buffer trigger needs
(specific to a journal_head) isn't the same as what a transaction
trigger needs (specific to a tranaction_t or perhaps journal_t).  So I
implemented a buffer set, with the understanding that
journal/transaction wide triggers should be implemented separately.

There is only one trigger set allowed per buffer.  I can't think of any
reason to attach more than one set.  Contrast this with a journal or
transaction in which multiple places may want to watch the entire
transaction separately.

The trigger sets are considered static allocation from the jbd2
perspective.  ocfs2 will just have one trigger set per block type,
setting the same set on every bh of the same type.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

Showing 5 changed files with 114 additions and 0 deletions Side-by-side Diff

... ... @@ -509,6 +509,10 @@
509 509 if (is_journal_aborted(journal)) {
510 510 clear_buffer_jbddirty(jh2bh(jh));
511 511 JBUFFER_TRACE(jh, "journal is aborting: refile");
  512 + jbd2_buffer_abort_trigger(jh,
  513 + jh->b_frozen_data ?
  514 + jh->b_frozen_triggers :
  515 + jh->b_triggers);
512 516 jbd2_journal_refile_buffer(journal, jh);
513 517 /* If that was the last one, we need to clean up
514 518 * any descriptor buffers which may have been
... ... @@ -844,6 +848,9 @@
844 848 * data.
845 849 *
846 850 * Otherwise, we can just throw away the frozen data now.
  851 + *
  852 + * We also know that the frozen data has already fired
  853 + * its triggers if they exist, so we can clear that too.
847 854 */
848 855 if (jh->b_committed_data) {
849 856 jbd2_free(jh->b_committed_data, bh->b_size);
850 857  
... ... @@ -851,10 +858,12 @@
851 858 if (jh->b_frozen_data) {
852 859 jh->b_committed_data = jh->b_frozen_data;
853 860 jh->b_frozen_data = NULL;
  861 + jh->b_frozen_triggers = NULL;
854 862 }
855 863 } else if (jh->b_frozen_data) {
856 864 jbd2_free(jh->b_frozen_data, bh->b_size);
857 865 jh->b_frozen_data = NULL;
  866 + jh->b_frozen_triggers = NULL;
858 867 }
859 868  
860 869 spin_lock(&journal->j_list_lock);
... ... @@ -50,6 +50,7 @@
50 50 EXPORT_SYMBOL(jbd2_journal_get_write_access);
51 51 EXPORT_SYMBOL(jbd2_journal_get_create_access);
52 52 EXPORT_SYMBOL(jbd2_journal_get_undo_access);
  53 +EXPORT_SYMBOL(jbd2_journal_set_triggers);
53 54 EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
54 55 EXPORT_SYMBOL(jbd2_journal_release_buffer);
55 56 EXPORT_SYMBOL(jbd2_journal_forget);
... ... @@ -290,6 +291,7 @@
290 291 struct page *new_page;
291 292 unsigned int new_offset;
292 293 struct buffer_head *bh_in = jh2bh(jh_in);
  294 + struct jbd2_buffer_trigger_type *triggers;
293 295  
294 296 /*
295 297 * The buffer really shouldn't be locked: only the current committing
296 298  
297 299  
... ... @@ -314,13 +316,23 @@
314 316 done_copy_out = 1;
315 317 new_page = virt_to_page(jh_in->b_frozen_data);
316 318 new_offset = offset_in_page(jh_in->b_frozen_data);
  319 + triggers = jh_in->b_frozen_triggers;
317 320 } else {
318 321 new_page = jh2bh(jh_in)->b_page;
319 322 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
  323 + triggers = jh_in->b_triggers;
320 324 }
321 325  
322 326 mapped_data = kmap_atomic(new_page, KM_USER0);
323 327 /*
  328 + * Fire any commit trigger. Do this before checking for escaping,
  329 + * as the trigger may modify the magic offset. If a copy-out
  330 + * happens afterwards, it will have the correct data in the buffer.
  331 + */
  332 + jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
  333 + triggers);
  334 +
  335 + /*
324 336 * Check for escaping
325 337 */
326 338 if (*((__be32 *)(mapped_data + new_offset)) ==
... ... @@ -352,6 +364,13 @@
352 364 new_page = virt_to_page(tmp);
353 365 new_offset = offset_in_page(tmp);
354 366 done_copy_out = 1;
  367 +
  368 + /*
  369 + * This isn't strictly necessary, as we're using frozen
  370 + * data for the escaping, but it keeps consistency with
  371 + * b_frozen_data usage.
  372 + */
  373 + jh_in->b_frozen_triggers = jh_in->b_triggers;
355 374 }
356 375  
357 376 /*
fs/jbd2/transaction.c
... ... @@ -741,6 +741,12 @@
741 741 source = kmap_atomic(page, KM_USER0);
742 742 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
743 743 kunmap_atomic(source, KM_USER0);
  744 +
  745 + /*
  746 + * Now that the frozen data is saved off, we need to store
  747 + * any matching triggers.
  748 + */
  749 + jh->b_frozen_triggers = jh->b_triggers;
744 750 }
745 751 jbd_unlock_bh_state(bh);
746 752  
... ... @@ -942,6 +948,47 @@
942 948 jbd2_free(committed_data, bh->b_size);
943 949 return err;
944 950 }
  951 +
  952 +/**
  953 + * void jbd2_journal_set_triggers() - Add triggers for commit writeout
  954 + * @bh: buffer to trigger on
  955 + * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
  956 + *
  957 + * Set any triggers on this journal_head. This is always safe, because
  958 + * triggers for a committing buffer will be saved off, and triggers for
  959 + * a running transaction will match the buffer in that transaction.
  960 + *
  961 + * Call with NULL to clear the triggers.
  962 + */
  963 +void jbd2_journal_set_triggers(struct buffer_head *bh,
  964 + struct jbd2_buffer_trigger_type *type)
  965 +{
  966 + struct journal_head *jh = bh2jh(bh);
  967 +
  968 + jh->b_triggers = type;
  969 +}
  970 +
  971 +void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
  972 + struct jbd2_buffer_trigger_type *triggers)
  973 +{
  974 + struct buffer_head *bh = jh2bh(jh);
  975 +
  976 + if (!triggers || !triggers->t_commit)
  977 + return;
  978 +
  979 + triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
  980 +}
  981 +
  982 +void jbd2_buffer_abort_trigger(struct journal_head *jh,
  983 + struct jbd2_buffer_trigger_type *triggers)
  984 +{
  985 + if (!triggers || !triggers->t_abort)
  986 + return;
  987 +
  988 + triggers->t_abort(triggers, jh2bh(jh));
  989 +}
  990 +
  991 +
945 992  
946 993 /**
947 994 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
include/linux/jbd2.h
... ... @@ -1008,6 +1008,35 @@
1008 1008 int __jbd2_journal_remove_checkpoint(struct journal_head *);
1009 1009 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
1010 1010  
  1011 +
  1012 +/*
  1013 + * Triggers
  1014 + */
  1015 +
  1016 +struct jbd2_buffer_trigger_type {
  1017 + /*
  1018 + * Fired just before a buffer is written to the journal.
  1019 + * mapped_data is a mapped buffer that is the frozen data for
  1020 + * commit.
  1021 + */
  1022 + void (*t_commit)(struct jbd2_buffer_trigger_type *type,
  1023 + struct buffer_head *bh, void *mapped_data,
  1024 + size_t size);
  1025 +
  1026 + /*
  1027 + * Fired during journal abort for dirty buffers that will not be
  1028 + * committed.
  1029 + */
  1030 + void (*t_abort)(struct jbd2_buffer_trigger_type *type,
  1031 + struct buffer_head *bh);
  1032 +};
  1033 +
  1034 +extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
  1035 + void *mapped_data,
  1036 + struct jbd2_buffer_trigger_type *triggers);
  1037 +extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
  1038 + struct jbd2_buffer_trigger_type *triggers);
  1039 +
1011 1040 /* Buffer IO */
1012 1041 extern int
1013 1042 jbd2_journal_write_metadata_buffer(transaction_t *transaction,
... ... @@ -1046,6 +1075,8 @@
1046 1075 extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
1047 1076 extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
1048 1077 extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
  1078 +void jbd2_journal_set_triggers(struct buffer_head *,
  1079 + struct jbd2_buffer_trigger_type *type);
1049 1080 extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
1050 1081 extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
1051 1082 extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
include/linux/journal-head.h
... ... @@ -12,6 +12,8 @@
12 12  
13 13 typedef unsigned int tid_t; /* Unique transaction ID */
14 14 typedef struct transaction_s transaction_t; /* Compound transaction type */
  15 +
  16 +
15 17 struct buffer_head;
16 18  
17 19 struct journal_head {
... ... @@ -87,6 +89,12 @@
87 89 * [j_list_lock]
88 90 */
89 91 struct journal_head *b_cpnext, *b_cpprev;
  92 +
  93 + /* Trigger type */
  94 + struct jbd2_buffer_trigger_type *b_triggers;
  95 +
  96 + /* Trigger type for the committing transaction's frozen data */
  97 + struct jbd2_buffer_trigger_type *b_frozen_triggers;
90 98 };
91 99  
92 100 #endif /* JOURNAL_HEAD_H_INCLUDED */