Commit e0e851cf30f1a9bd2e2a7624e9810378d6a2b072

Authored by Chris Mason
Committed by Linus Torvalds
1 parent fc5cd582e9

[PATCH] reiserfs: reiserfs hang and performance fix for data=journal mode

In data=journal mode, reiserfs writepage needs to make sure not to trigger
transactions while being run under PF_MEMALLOC.  This patch makes sure to
redirty the page instead of forcing a transaction start in this case.

Also, calling filemap_fdata* in order to trigger io on the block device can
cause lock inversions on the page lock.  Instead, do simple batching from
flush_commit_list.

Signed-off-by: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 2 changed files with 21 additions and 5 deletions Inline Diff

1 /* 1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5 #include <linux/config.h> 5 #include <linux/config.h>
6 #include <linux/time.h> 6 #include <linux/time.h>
7 #include <linux/fs.h> 7 #include <linux/fs.h>
8 #include <linux/reiserfs_fs.h> 8 #include <linux/reiserfs_fs.h>
9 #include <linux/reiserfs_acl.h> 9 #include <linux/reiserfs_acl.h>
10 #include <linux/reiserfs_xattr.h> 10 #include <linux/reiserfs_xattr.h>
11 #include <linux/smp_lock.h> 11 #include <linux/smp_lock.h>
12 #include <linux/pagemap.h> 12 #include <linux/pagemap.h>
13 #include <linux/highmem.h> 13 #include <linux/highmem.h>
14 #include <asm/uaccess.h> 14 #include <asm/uaccess.h>
15 #include <asm/unaligned.h> 15 #include <asm/unaligned.h>
16 #include <linux/buffer_head.h> 16 #include <linux/buffer_head.h>
17 #include <linux/mpage.h> 17 #include <linux/mpage.h>
18 #include <linux/writeback.h> 18 #include <linux/writeback.h>
19 #include <linux/quotaops.h> 19 #include <linux/quotaops.h>
20 20
21 extern int reiserfs_default_io_size; /* default io size devuned in super.c */ 21 extern int reiserfs_default_io_size; /* default io size devuned in super.c */
22 22
23 static int reiserfs_commit_write(struct file *f, struct page *page, 23 static int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
25 static int reiserfs_prepare_write(struct file *f, struct page *page, 25 static int reiserfs_prepare_write(struct file *f, struct page *page,
26 unsigned from, unsigned to); 26 unsigned from, unsigned to);
27 27
28 void reiserfs_delete_inode(struct inode *inode) 28 void reiserfs_delete_inode(struct inode *inode)
29 { 29 {
30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */
31 int jbegin_count = 31 int jbegin_count =
32 JOURNAL_PER_BALANCE_CNT * 2 + 32 JOURNAL_PER_BALANCE_CNT * 2 +
33 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 33 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
34 struct reiserfs_transaction_handle th; 34 struct reiserfs_transaction_handle th;
35 int err; 35 int err;
36 36
37 truncate_inode_pages(&inode->i_data, 0); 37 truncate_inode_pages(&inode->i_data, 0);
38 38
39 reiserfs_write_lock(inode->i_sb); 39 reiserfs_write_lock(inode->i_sb);
40 40
41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
43 mutex_lock(&inode->i_mutex); 43 mutex_lock(&inode->i_mutex);
44 44
45 reiserfs_delete_xattrs(inode); 45 reiserfs_delete_xattrs(inode);
46 46
47 if (journal_begin(&th, inode->i_sb, jbegin_count)) { 47 if (journal_begin(&th, inode->i_sb, jbegin_count)) {
48 mutex_unlock(&inode->i_mutex); 48 mutex_unlock(&inode->i_mutex);
49 goto out; 49 goto out;
50 } 50 }
51 reiserfs_update_inode_transaction(inode); 51 reiserfs_update_inode_transaction(inode);
52 52
53 err = reiserfs_delete_object(&th, inode); 53 err = reiserfs_delete_object(&th, inode);
54 54
55 /* Do quota update inside a transaction for journaled quotas. We must do that 55 /* Do quota update inside a transaction for journaled quotas. We must do that
56 * after delete_object so that quota updates go into the same transaction as 56 * after delete_object so that quota updates go into the same transaction as
57 * stat data deletion */ 57 * stat data deletion */
58 if (!err) 58 if (!err)
59 DQUOT_FREE_INODE(inode); 59 DQUOT_FREE_INODE(inode);
60 60
61 if (journal_end(&th, inode->i_sb, jbegin_count)) { 61 if (journal_end(&th, inode->i_sb, jbegin_count)) {
62 mutex_unlock(&inode->i_mutex); 62 mutex_unlock(&inode->i_mutex);
63 goto out; 63 goto out;
64 } 64 }
65 65
66 mutex_unlock(&inode->i_mutex); 66 mutex_unlock(&inode->i_mutex);
67 67
68 /* check return value from reiserfs_delete_object after 68 /* check return value from reiserfs_delete_object after
69 * ending the transaction 69 * ending the transaction
70 */ 70 */
71 if (err) 71 if (err)
72 goto out; 72 goto out;
73 73
74 /* all items of file are deleted, so we can remove "save" link */ 74 /* all items of file are deleted, so we can remove "save" link */
75 remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything 75 remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything
76 * about an error here */ 76 * about an error here */
77 } else { 77 } else {
78 /* no object items are in the tree */ 78 /* no object items are in the tree */
79 ; 79 ;
80 } 80 }
81 out: 81 out:
82 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 82 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
83 inode->i_blocks = 0; 83 inode->i_blocks = 0;
84 reiserfs_write_unlock(inode->i_sb); 84 reiserfs_write_unlock(inode->i_sb);
85 } 85 }
86 86
87 static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 87 static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
88 __u32 objectid, loff_t offset, int type, int length) 88 __u32 objectid, loff_t offset, int type, int length)
89 { 89 {
90 key->version = version; 90 key->version = version;
91 91
92 key->on_disk_key.k_dir_id = dirid; 92 key->on_disk_key.k_dir_id = dirid;
93 key->on_disk_key.k_objectid = objectid; 93 key->on_disk_key.k_objectid = objectid;
94 set_cpu_key_k_offset(key, offset); 94 set_cpu_key_k_offset(key, offset);
95 set_cpu_key_k_type(key, type); 95 set_cpu_key_k_type(key, type);
96 key->key_length = length; 96 key->key_length = length;
97 } 97 }
98 98
99 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set 99 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
100 offset and type of key */ 100 offset and type of key */
101 void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, 101 void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
102 int type, int length) 102 int type, int length)
103 { 103 {
104 _make_cpu_key(key, get_inode_item_key_version(inode), 104 _make_cpu_key(key, get_inode_item_key_version(inode),
105 le32_to_cpu(INODE_PKEY(inode)->k_dir_id), 105 le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
106 le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, 106 le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
107 length); 107 length);
108 } 108 }
109 109
110 // 110 //
111 // when key is 0, do not set version and short key 111 // when key is 0, do not set version and short key
112 // 112 //
113 inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, 113 inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
114 int version, 114 int version,
115 loff_t offset, int type, int length, 115 loff_t offset, int type, int length,
116 int entry_count /*or ih_free_space */ ) 116 int entry_count /*or ih_free_space */ )
117 { 117 {
118 if (key) { 118 if (key) {
119 ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); 119 ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
120 ih->ih_key.k_objectid = 120 ih->ih_key.k_objectid =
121 cpu_to_le32(key->on_disk_key.k_objectid); 121 cpu_to_le32(key->on_disk_key.k_objectid);
122 } 122 }
123 put_ih_version(ih, version); 123 put_ih_version(ih, version);
124 set_le_ih_k_offset(ih, offset); 124 set_le_ih_k_offset(ih, offset);
125 set_le_ih_k_type(ih, type); 125 set_le_ih_k_type(ih, type);
126 put_ih_item_len(ih, length); 126 put_ih_item_len(ih, length);
127 /* set_ih_free_space (ih, 0); */ 127 /* set_ih_free_space (ih, 0); */
128 // for directory items it is entry count, for directs and stat 128 // for directory items it is entry count, for directs and stat
129 // datas - 0xffff, for indirects - 0 129 // datas - 0xffff, for indirects - 0
130 put_ih_entry_count(ih, entry_count); 130 put_ih_entry_count(ih, entry_count);
131 } 131 }
132 132
133 // 133 //
134 // FIXME: we might cache recently accessed indirect item 134 // FIXME: we might cache recently accessed indirect item
135 135
136 // Ugh. Not too eager for that.... 136 // Ugh. Not too eager for that....
137 // I cut the code until such time as I see a convincing argument (benchmark). 137 // I cut the code until such time as I see a convincing argument (benchmark).
138 // I don't want a bloated inode struct..., and I don't like code complexity.... 138 // I don't want a bloated inode struct..., and I don't like code complexity....
139 139
140 /* cutting the code is fine, since it really isn't in use yet and is easy 140 /* cutting the code is fine, since it really isn't in use yet and is easy
141 ** to add back in. But, Vladimir has a really good idea here. Think 141 ** to add back in. But, Vladimir has a really good idea here. Think
142 ** about what happens for reading a file. For each page, 142 ** about what happens for reading a file. For each page,
143 ** The VFS layer calls reiserfs_readpage, who searches the tree to find 143 ** The VFS layer calls reiserfs_readpage, who searches the tree to find
144 ** an indirect item. This indirect item has X number of pointers, where 144 ** an indirect item. This indirect item has X number of pointers, where
145 ** X is a big number if we've done the block allocation right. But, 145 ** X is a big number if we've done the block allocation right. But,
146 ** we only use one or two of these pointers during each call to readpage, 146 ** we only use one or two of these pointers during each call to readpage,
147 ** needlessly researching again later on. 147 ** needlessly researching again later on.
148 ** 148 **
149 ** The size of the cache could be dynamic based on the size of the file. 149 ** The size of the cache could be dynamic based on the size of the file.
150 ** 150 **
151 ** I'd also like to see us cache the location the stat data item, since 151 ** I'd also like to see us cache the location the stat data item, since
152 ** we are needlessly researching for that frequently. 152 ** we are needlessly researching for that frequently.
153 ** 153 **
154 ** --chris 154 ** --chris
155 */ 155 */
156 156
157 /* If this page has a file tail in it, and 157 /* If this page has a file tail in it, and
158 ** it was read in by get_block_create_0, the page data is valid, 158 ** it was read in by get_block_create_0, the page data is valid,
159 ** but tail is still sitting in a direct item, and we can't write to 159 ** but tail is still sitting in a direct item, and we can't write to
160 ** it. So, look through this page, and check all the mapped buffers 160 ** it. So, look through this page, and check all the mapped buffers
161 ** to make sure they have valid block numbers. Any that don't need 161 ** to make sure they have valid block numbers. Any that don't need
162 ** to be unmapped, so that block_prepare_write will correctly call 162 ** to be unmapped, so that block_prepare_write will correctly call
163 ** reiserfs_get_block to convert the tail into an unformatted node 163 ** reiserfs_get_block to convert the tail into an unformatted node
164 */ 164 */
165 static inline void fix_tail_page_for_writing(struct page *page) 165 static inline void fix_tail_page_for_writing(struct page *page)
166 { 166 {
167 struct buffer_head *head, *next, *bh; 167 struct buffer_head *head, *next, *bh;
168 168
169 if (page && page_has_buffers(page)) { 169 if (page && page_has_buffers(page)) {
170 head = page_buffers(page); 170 head = page_buffers(page);
171 bh = head; 171 bh = head;
172 do { 172 do {
173 next = bh->b_this_page; 173 next = bh->b_this_page;
174 if (buffer_mapped(bh) && bh->b_blocknr == 0) { 174 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
175 reiserfs_unmap_buffer(bh); 175 reiserfs_unmap_buffer(bh);
176 } 176 }
177 bh = next; 177 bh = next;
178 } while (bh != head); 178 } while (bh != head);
179 } 179 }
180 } 180 }
181 181
182 /* reiserfs_get_block does not need to allocate a block only if it has been 182 /* reiserfs_get_block does not need to allocate a block only if it has been
183 done already or non-hole position has been found in the indirect item */ 183 done already or non-hole position has been found in the indirect item */
184 static inline int allocation_needed(int retval, b_blocknr_t allocated, 184 static inline int allocation_needed(int retval, b_blocknr_t allocated,
185 struct item_head *ih, 185 struct item_head *ih,
186 __le32 * item, int pos_in_item) 186 __le32 * item, int pos_in_item)
187 { 187 {
188 if (allocated) 188 if (allocated)
189 return 0; 189 return 0;
190 if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && 190 if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
191 get_block_num(item, pos_in_item)) 191 get_block_num(item, pos_in_item))
192 return 0; 192 return 0;
193 return 1; 193 return 1;
194 } 194 }
195 195
196 static inline int indirect_item_found(int retval, struct item_head *ih) 196 static inline int indirect_item_found(int retval, struct item_head *ih)
197 { 197 {
198 return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); 198 return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
199 } 199 }
200 200
201 static inline void set_block_dev_mapped(struct buffer_head *bh, 201 static inline void set_block_dev_mapped(struct buffer_head *bh,
202 b_blocknr_t block, struct inode *inode) 202 b_blocknr_t block, struct inode *inode)
203 { 203 {
204 map_bh(bh, inode->i_sb, block); 204 map_bh(bh, inode->i_sb, block);
205 } 205 }
206 206
207 // 207 //
208 // files which were created in the earlier version can not be longer, 208 // files which were created in the earlier version can not be longer,
209 // than 2 gb 209 // than 2 gb
210 // 210 //
211 static int file_capable(struct inode *inode, long block) 211 static int file_capable(struct inode *inode, long block)
212 { 212 {
213 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. 213 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file.
214 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb 214 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
215 return 1; 215 return 1;
216 216
217 return 0; 217 return 0;
218 } 218 }
219 219
220 /*static*/ int restart_transaction(struct reiserfs_transaction_handle *th, 220 /*static*/ int restart_transaction(struct reiserfs_transaction_handle *th,
221 struct inode *inode, struct path *path) 221 struct inode *inode, struct path *path)
222 { 222 {
223 struct super_block *s = th->t_super; 223 struct super_block *s = th->t_super;
224 int len = th->t_blocks_allocated; 224 int len = th->t_blocks_allocated;
225 int err; 225 int err;
226 226
227 BUG_ON(!th->t_trans_id); 227 BUG_ON(!th->t_trans_id);
228 BUG_ON(!th->t_refcount); 228 BUG_ON(!th->t_refcount);
229 229
230 /* we cannot restart while nested */ 230 /* we cannot restart while nested */
231 if (th->t_refcount > 1) { 231 if (th->t_refcount > 1) {
232 return 0; 232 return 0;
233 } 233 }
234 pathrelse(path); 234 pathrelse(path);
235 reiserfs_update_sd(th, inode); 235 reiserfs_update_sd(th, inode);
236 err = journal_end(th, s, len); 236 err = journal_end(th, s, len);
237 if (!err) { 237 if (!err) {
238 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); 238 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
239 if (!err) 239 if (!err)
240 reiserfs_update_inode_transaction(inode); 240 reiserfs_update_inode_transaction(inode);
241 } 241 }
242 return err; 242 return err;
243 } 243 }
244 244
245 // it is called by get_block when create == 0. Returns block number 245 // it is called by get_block when create == 0. Returns block number
246 // for 'block'-th logical block of file. When it hits direct item it 246 // for 'block'-th logical block of file. When it hits direct item it
247 // returns 0 (being called from bmap) or read direct item into piece 247 // returns 0 (being called from bmap) or read direct item into piece
248 // of page (bh_result) 248 // of page (bh_result)
249 249
250 // Please improve the english/clarity in the comment above, as it is 250 // Please improve the english/clarity in the comment above, as it is
251 // hard to understand. 251 // hard to understand.
252 252
253 static int _get_block_create_0(struct inode *inode, long block, 253 static int _get_block_create_0(struct inode *inode, long block,
254 struct buffer_head *bh_result, int args) 254 struct buffer_head *bh_result, int args)
255 { 255 {
256 INITIALIZE_PATH(path); 256 INITIALIZE_PATH(path);
257 struct cpu_key key; 257 struct cpu_key key;
258 struct buffer_head *bh; 258 struct buffer_head *bh;
259 struct item_head *ih, tmp_ih; 259 struct item_head *ih, tmp_ih;
260 int fs_gen; 260 int fs_gen;
261 int blocknr; 261 int blocknr;
262 char *p = NULL; 262 char *p = NULL;
263 int chars; 263 int chars;
264 int ret; 264 int ret;
265 int result; 265 int result;
266 int done = 0; 266 int done = 0;
267 unsigned long offset; 267 unsigned long offset;
268 268
269 // prepare the key to look for the 'block'-th block of file 269 // prepare the key to look for the 'block'-th block of file
270 make_cpu_key(&key, inode, 270 make_cpu_key(&key, inode,
271 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 271 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
272 3); 272 3);
273 273
274 research: 274 research:
275 result = search_for_position_by_key(inode->i_sb, &key, &path); 275 result = search_for_position_by_key(inode->i_sb, &key, &path);
276 if (result != POSITION_FOUND) { 276 if (result != POSITION_FOUND) {
277 pathrelse(&path); 277 pathrelse(&path);
278 if (p) 278 if (p)
279 kunmap(bh_result->b_page); 279 kunmap(bh_result->b_page);
280 if (result == IO_ERROR) 280 if (result == IO_ERROR)
281 return -EIO; 281 return -EIO;
282 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 282 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
283 // That there is some MMAPED data associated with it that is yet to be written to disk. 283 // That there is some MMAPED data associated with it that is yet to be written to disk.
284 if ((args & GET_BLOCK_NO_HOLE) 284 if ((args & GET_BLOCK_NO_HOLE)
285 && !PageUptodate(bh_result->b_page)) { 285 && !PageUptodate(bh_result->b_page)) {
286 return -ENOENT; 286 return -ENOENT;
287 } 287 }
288 return 0; 288 return 0;
289 } 289 }
290 // 290 //
291 bh = get_last_bh(&path); 291 bh = get_last_bh(&path);
292 ih = get_ih(&path); 292 ih = get_ih(&path);
293 if (is_indirect_le_ih(ih)) { 293 if (is_indirect_le_ih(ih)) {
294 __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); 294 __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
295 295
296 /* FIXME: here we could cache indirect item or part of it in 296 /* FIXME: here we could cache indirect item or part of it in
297 the inode to avoid search_by_key in case of subsequent 297 the inode to avoid search_by_key in case of subsequent
298 access to file */ 298 access to file */
299 blocknr = get_block_num(ind_item, path.pos_in_item); 299 blocknr = get_block_num(ind_item, path.pos_in_item);
300 ret = 0; 300 ret = 0;
301 if (blocknr) { 301 if (blocknr) {
302 map_bh(bh_result, inode->i_sb, blocknr); 302 map_bh(bh_result, inode->i_sb, blocknr);
303 if (path.pos_in_item == 303 if (path.pos_in_item ==
304 ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { 304 ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
305 set_buffer_boundary(bh_result); 305 set_buffer_boundary(bh_result);
306 } 306 }
307 } else 307 } else
308 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 308 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
309 // That there is some MMAPED data associated with it that is yet to be written to disk. 309 // That there is some MMAPED data associated with it that is yet to be written to disk.
310 if ((args & GET_BLOCK_NO_HOLE) 310 if ((args & GET_BLOCK_NO_HOLE)
311 && !PageUptodate(bh_result->b_page)) { 311 && !PageUptodate(bh_result->b_page)) {
312 ret = -ENOENT; 312 ret = -ENOENT;
313 } 313 }
314 314
315 pathrelse(&path); 315 pathrelse(&path);
316 if (p) 316 if (p)
317 kunmap(bh_result->b_page); 317 kunmap(bh_result->b_page);
318 return ret; 318 return ret;
319 } 319 }
320 // requested data are in direct item(s) 320 // requested data are in direct item(s)
321 if (!(args & GET_BLOCK_READ_DIRECT)) { 321 if (!(args & GET_BLOCK_READ_DIRECT)) {
322 // we are called by bmap. FIXME: we can not map block of file 322 // we are called by bmap. FIXME: we can not map block of file
323 // when it is stored in direct item(s) 323 // when it is stored in direct item(s)
324 pathrelse(&path); 324 pathrelse(&path);
325 if (p) 325 if (p)
326 kunmap(bh_result->b_page); 326 kunmap(bh_result->b_page);
327 return -ENOENT; 327 return -ENOENT;
328 } 328 }
329 329
330 /* if we've got a direct item, and the buffer or page was uptodate, 330 /* if we've got a direct item, and the buffer or page was uptodate,
331 ** we don't want to pull data off disk again. skip to the 331 ** we don't want to pull data off disk again. skip to the
332 ** end, where we map the buffer and return 332 ** end, where we map the buffer and return
333 */ 333 */
334 if (buffer_uptodate(bh_result)) { 334 if (buffer_uptodate(bh_result)) {
335 goto finished; 335 goto finished;
336 } else 336 } else
337 /* 337 /*
338 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date 338 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
339 ** pages without any buffers. If the page is up to date, we don't want 339 ** pages without any buffers. If the page is up to date, we don't want
340 ** read old data off disk. Set the up to date bit on the buffer instead 340 ** read old data off disk. Set the up to date bit on the buffer instead
341 ** and jump to the end 341 ** and jump to the end
342 */ 342 */
343 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { 343 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
344 set_buffer_uptodate(bh_result); 344 set_buffer_uptodate(bh_result);
345 goto finished; 345 goto finished;
346 } 346 }
347 // read file tail into part of page 347 // read file tail into part of page
348 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); 348 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
349 fs_gen = get_generation(inode->i_sb); 349 fs_gen = get_generation(inode->i_sb);
350 copy_item_head(&tmp_ih, ih); 350 copy_item_head(&tmp_ih, ih);
351 351
352 /* we only want to kmap if we are reading the tail into the page. 352 /* we only want to kmap if we are reading the tail into the page.
353 ** this is not the common case, so we don't kmap until we are 353 ** this is not the common case, so we don't kmap until we are
354 ** sure we need to. But, this means the item might move if 354 ** sure we need to. But, this means the item might move if
355 ** kmap schedules 355 ** kmap schedules
356 */ 356 */
357 if (!p) { 357 if (!p) {
358 p = (char *)kmap(bh_result->b_page); 358 p = (char *)kmap(bh_result->b_page);
359 if (fs_changed(fs_gen, inode->i_sb) 359 if (fs_changed(fs_gen, inode->i_sb)
360 && item_moved(&tmp_ih, &path)) { 360 && item_moved(&tmp_ih, &path)) {
361 goto research; 361 goto research;
362 } 362 }
363 } 363 }
364 p += offset; 364 p += offset;
365 memset(p, 0, inode->i_sb->s_blocksize); 365 memset(p, 0, inode->i_sb->s_blocksize);
366 do { 366 do {
367 if (!is_direct_le_ih(ih)) { 367 if (!is_direct_le_ih(ih)) {
368 BUG(); 368 BUG();
369 } 369 }
370 /* make sure we don't read more bytes than actually exist in 370 /* make sure we don't read more bytes than actually exist in
371 ** the file. This can happen in odd cases where i_size isn't 371 ** the file. This can happen in odd cases where i_size isn't
372 ** correct, and when direct item padding results in a few 372 ** correct, and when direct item padding results in a few
373 ** extra bytes at the end of the direct item 373 ** extra bytes at the end of the direct item
374 */ 374 */
375 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) 375 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
376 break; 376 break;
377 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { 377 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
378 chars = 378 chars =
379 inode->i_size - (le_ih_k_offset(ih) - 1) - 379 inode->i_size - (le_ih_k_offset(ih) - 1) -
380 path.pos_in_item; 380 path.pos_in_item;
381 done = 1; 381 done = 1;
382 } else { 382 } else {
383 chars = ih_item_len(ih) - path.pos_in_item; 383 chars = ih_item_len(ih) - path.pos_in_item;
384 } 384 }
385 memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); 385 memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
386 386
387 if (done) 387 if (done)
388 break; 388 break;
389 389
390 p += chars; 390 p += chars;
391 391
392 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) 392 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
393 // we done, if read direct item is not the last item of 393 // we done, if read direct item is not the last item of
394 // node FIXME: we could try to check right delimiting key 394 // node FIXME: we could try to check right delimiting key
395 // to see whether direct item continues in the right 395 // to see whether direct item continues in the right
396 // neighbor or rely on i_size 396 // neighbor or rely on i_size
397 break; 397 break;
398 398
399 // update key to look for the next piece 399 // update key to look for the next piece
400 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); 400 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
401 result = search_for_position_by_key(inode->i_sb, &key, &path); 401 result = search_for_position_by_key(inode->i_sb, &key, &path);
402 if (result != POSITION_FOUND) 402 if (result != POSITION_FOUND)
403 // i/o error most likely 403 // i/o error most likely
404 break; 404 break;
405 bh = get_last_bh(&path); 405 bh = get_last_bh(&path);
406 ih = get_ih(&path); 406 ih = get_ih(&path);
407 } while (1); 407 } while (1);
408 408
409 flush_dcache_page(bh_result->b_page); 409 flush_dcache_page(bh_result->b_page);
410 kunmap(bh_result->b_page); 410 kunmap(bh_result->b_page);
411 411
412 finished: 412 finished:
413 pathrelse(&path); 413 pathrelse(&path);
414 414
415 if (result == IO_ERROR) 415 if (result == IO_ERROR)
416 return -EIO; 416 return -EIO;
417 417
418 /* this buffer has valid data, but isn't valid for io. mapping it to 418 /* this buffer has valid data, but isn't valid for io. mapping it to
419 * block #0 tells the rest of reiserfs it just has a tail in it 419 * block #0 tells the rest of reiserfs it just has a tail in it
420 */ 420 */
421 map_bh(bh_result, inode->i_sb, 0); 421 map_bh(bh_result, inode->i_sb, 0);
422 set_buffer_uptodate(bh_result); 422 set_buffer_uptodate(bh_result);
423 return 0; 423 return 0;
424 } 424 }
425 425
426 // this is called to create file map. So, _get_block_create_0 will not 426 // this is called to create file map. So, _get_block_create_0 will not
427 // read direct item 427 // read direct item
428 static int reiserfs_bmap(struct inode *inode, sector_t block, 428 static int reiserfs_bmap(struct inode *inode, sector_t block,
429 struct buffer_head *bh_result, int create) 429 struct buffer_head *bh_result, int create)
430 { 430 {
431 if (!file_capable(inode, block)) 431 if (!file_capable(inode, block))
432 return -EFBIG; 432 return -EFBIG;
433 433
434 reiserfs_write_lock(inode->i_sb); 434 reiserfs_write_lock(inode->i_sb);
435 /* do not read the direct item */ 435 /* do not read the direct item */
436 _get_block_create_0(inode, block, bh_result, 0); 436 _get_block_create_0(inode, block, bh_result, 0);
437 reiserfs_write_unlock(inode->i_sb); 437 reiserfs_write_unlock(inode->i_sb);
438 return 0; 438 return 0;
439 } 439 }
440 440
441 /* special version of get_block that is only used by grab_tail_page right 441 /* special version of get_block that is only used by grab_tail_page right
442 ** now. It is sent to block_prepare_write, and when you try to get a 442 ** now. It is sent to block_prepare_write, and when you try to get a
443 ** block past the end of the file (or a block from a hole) it returns 443 ** block past the end of the file (or a block from a hole) it returns
444 ** -ENOENT instead of a valid buffer. block_prepare_write expects to 444 ** -ENOENT instead of a valid buffer. block_prepare_write expects to
445 ** be able to do i/o on the buffers returned, unless an error value 445 ** be able to do i/o on the buffers returned, unless an error value
446 ** is also returned. 446 ** is also returned.
447 ** 447 **
448 ** So, this allows block_prepare_write to be used for reading a single block 448 ** So, this allows block_prepare_write to be used for reading a single block
449 ** in a page. Where it does not produce a valid page for holes, or past the 449 ** in a page. Where it does not produce a valid page for holes, or past the
450 ** end of the file. This turns out to be exactly what we need for reading 450 ** end of the file. This turns out to be exactly what we need for reading
451 ** tails for conversion. 451 ** tails for conversion.
452 ** 452 **
453 ** The point of the wrapper is forcing a certain value for create, even 453 ** The point of the wrapper is forcing a certain value for create, even
454 ** though the VFS layer is calling this function with create==1. If you 454 ** though the VFS layer is calling this function with create==1. If you
455 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 455 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
456 ** don't use this function. 456 ** don't use this function.
457 */ 457 */
458 static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, 458 static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
459 struct buffer_head *bh_result, 459 struct buffer_head *bh_result,
460 int create) 460 int create)
461 { 461 {
462 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); 462 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
463 } 463 }
464 464
465 /* This is special helper for reiserfs_get_block in case we are executing 465 /* This is special helper for reiserfs_get_block in case we are executing
466 direct_IO request. */ 466 direct_IO request. */
467 static int reiserfs_get_blocks_direct_io(struct inode *inode, 467 static int reiserfs_get_blocks_direct_io(struct inode *inode,
468 sector_t iblock, 468 sector_t iblock,
469 unsigned long max_blocks, 469 unsigned long max_blocks,
470 struct buffer_head *bh_result, 470 struct buffer_head *bh_result,
471 int create) 471 int create)
472 { 472 {
473 int ret; 473 int ret;
474 474
475 bh_result->b_page = NULL; 475 bh_result->b_page = NULL;
476 476
477 /* We set the b_size before reiserfs_get_block call since it is 477 /* We set the b_size before reiserfs_get_block call since it is
478 referenced in convert_tail_for_hole() that may be called from 478 referenced in convert_tail_for_hole() that may be called from
479 reiserfs_get_block() */ 479 reiserfs_get_block() */
480 bh_result->b_size = (1 << inode->i_blkbits); 480 bh_result->b_size = (1 << inode->i_blkbits);
481 481
482 ret = reiserfs_get_block(inode, iblock, bh_result, 482 ret = reiserfs_get_block(inode, iblock, bh_result,
483 create | GET_BLOCK_NO_DANGLE); 483 create | GET_BLOCK_NO_DANGLE);
484 if (ret) 484 if (ret)
485 goto out; 485 goto out;
486 486
487 /* don't allow direct io onto tail pages */ 487 /* don't allow direct io onto tail pages */
488 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { 488 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
489 /* make sure future calls to the direct io funcs for this offset 489 /* make sure future calls to the direct io funcs for this offset
490 ** in the file fail by unmapping the buffer 490 ** in the file fail by unmapping the buffer
491 */ 491 */
492 clear_buffer_mapped(bh_result); 492 clear_buffer_mapped(bh_result);
493 ret = -EINVAL; 493 ret = -EINVAL;
494 } 494 }
495 /* Possible unpacked tail. Flush the data before pages have 495 /* Possible unpacked tail. Flush the data before pages have
496 disappeared */ 496 disappeared */
497 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { 497 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
498 int err; 498 int err;
499 lock_kernel(); 499 lock_kernel();
500 err = reiserfs_commit_for_inode(inode); 500 err = reiserfs_commit_for_inode(inode);
501 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 501 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
502 unlock_kernel(); 502 unlock_kernel();
503 if (err < 0) 503 if (err < 0)
504 ret = err; 504 ret = err;
505 } 505 }
506 out: 506 out:
507 return ret; 507 return ret;
508 } 508 }
509 509
510 /* 510 /*
511 ** helper function for when reiserfs_get_block is called for a hole 511 ** helper function for when reiserfs_get_block is called for a hole
512 ** but the file tail is still in a direct item 512 ** but the file tail is still in a direct item
513 ** bh_result is the buffer head for the hole 513 ** bh_result is the buffer head for the hole
514 ** tail_offset is the offset of the start of the tail in the file 514 ** tail_offset is the offset of the start of the tail in the file
515 ** 515 **
516 ** This calls prepare_write, which will start a new transaction 516 ** This calls prepare_write, which will start a new transaction
517 ** you should not be in a transaction, or have any paths held when you 517 ** you should not be in a transaction, or have any paths held when you
518 ** call this. 518 ** call this.
519 */ 519 */
520 static int convert_tail_for_hole(struct inode *inode, 520 static int convert_tail_for_hole(struct inode *inode,
521 struct buffer_head *bh_result, 521 struct buffer_head *bh_result,
522 loff_t tail_offset) 522 loff_t tail_offset)
523 { 523 {
524 unsigned long index; 524 unsigned long index;
525 unsigned long tail_end; 525 unsigned long tail_end;
526 unsigned long tail_start; 526 unsigned long tail_start;
527 struct page *tail_page; 527 struct page *tail_page;
528 struct page *hole_page = bh_result->b_page; 528 struct page *hole_page = bh_result->b_page;
529 int retval = 0; 529 int retval = 0;
530 530
531 if ((tail_offset & (bh_result->b_size - 1)) != 1) 531 if ((tail_offset & (bh_result->b_size - 1)) != 1)
532 return -EIO; 532 return -EIO;
533 533
534 /* always try to read until the end of the block */ 534 /* always try to read until the end of the block */
535 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1); 535 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
536 tail_end = (tail_start | (bh_result->b_size - 1)) + 1; 536 tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
537 537
538 index = tail_offset >> PAGE_CACHE_SHIFT; 538 index = tail_offset >> PAGE_CACHE_SHIFT;
539 /* hole_page can be zero in case of direct_io, we are sure 539 /* hole_page can be zero in case of direct_io, we are sure
540 that we cannot get here if we write with O_DIRECT into 540 that we cannot get here if we write with O_DIRECT into
541 tail page */ 541 tail page */
542 if (!hole_page || index != hole_page->index) { 542 if (!hole_page || index != hole_page->index) {
543 tail_page = grab_cache_page(inode->i_mapping, index); 543 tail_page = grab_cache_page(inode->i_mapping, index);
544 retval = -ENOMEM; 544 retval = -ENOMEM;
545 if (!tail_page) { 545 if (!tail_page) {
546 goto out; 546 goto out;
547 } 547 }
548 } else { 548 } else {
549 tail_page = hole_page; 549 tail_page = hole_page;
550 } 550 }
551 551
552 /* we don't have to make sure the conversion did not happen while 552 /* we don't have to make sure the conversion did not happen while
553 ** we were locking the page because anyone that could convert 553 ** we were locking the page because anyone that could convert
554 ** must first take i_mutex. 554 ** must first take i_mutex.
555 ** 555 **
556 ** We must fix the tail page for writing because it might have buffers 556 ** We must fix the tail page for writing because it might have buffers
557 ** that are mapped, but have a block number of 0. This indicates tail 557 ** that are mapped, but have a block number of 0. This indicates tail
558 ** data that has been read directly into the page, and block_prepare_write 558 ** data that has been read directly into the page, and block_prepare_write
559 ** won't trigger a get_block in this case. 559 ** won't trigger a get_block in this case.
560 */ 560 */
561 fix_tail_page_for_writing(tail_page); 561 fix_tail_page_for_writing(tail_page);
562 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); 562 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
563 if (retval) 563 if (retval)
564 goto unlock; 564 goto unlock;
565 565
566 /* tail conversion might change the data in the page */ 566 /* tail conversion might change the data in the page */
567 flush_dcache_page(tail_page); 567 flush_dcache_page(tail_page);
568 568
569 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); 569 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
570 570
571 unlock: 571 unlock:
572 if (tail_page != hole_page) { 572 if (tail_page != hole_page) {
573 unlock_page(tail_page); 573 unlock_page(tail_page);
574 page_cache_release(tail_page); 574 page_cache_release(tail_page);
575 } 575 }
576 out: 576 out:
577 return retval; 577 return retval;
578 } 578 }
579 579
580 static inline int _allocate_block(struct reiserfs_transaction_handle *th, 580 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
581 long block, 581 long block,
582 struct inode *inode, 582 struct inode *inode,
583 b_blocknr_t * allocated_block_nr, 583 b_blocknr_t * allocated_block_nr,
584 struct path *path, int flags) 584 struct path *path, int flags)
585 { 585 {
586 BUG_ON(!th->t_trans_id); 586 BUG_ON(!th->t_trans_id);
587 587
588 #ifdef REISERFS_PREALLOCATE 588 #ifdef REISERFS_PREALLOCATE
589 if (!(flags & GET_BLOCK_NO_IMUX)) { 589 if (!(flags & GET_BLOCK_NO_IMUX)) {
590 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, 590 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
591 path, block); 591 path, block);
592 } 592 }
593 #endif 593 #endif
594 return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, 594 return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
595 block); 595 block);
596 } 596 }
597 597
598 int reiserfs_get_block(struct inode *inode, sector_t block, 598 int reiserfs_get_block(struct inode *inode, sector_t block,
599 struct buffer_head *bh_result, int create) 599 struct buffer_head *bh_result, int create)
600 { 600 {
601 int repeat, retval = 0; 601 int repeat, retval = 0;
602 b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int 602 b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int
603 INITIALIZE_PATH(path); 603 INITIALIZE_PATH(path);
604 int pos_in_item; 604 int pos_in_item;
605 struct cpu_key key; 605 struct cpu_key key;
606 struct buffer_head *bh, *unbh = NULL; 606 struct buffer_head *bh, *unbh = NULL;
607 struct item_head *ih, tmp_ih; 607 struct item_head *ih, tmp_ih;
608 __le32 *item; 608 __le32 *item;
609 int done; 609 int done;
610 int fs_gen; 610 int fs_gen;
611 struct reiserfs_transaction_handle *th = NULL; 611 struct reiserfs_transaction_handle *th = NULL;
612 /* space reserved in transaction batch: 612 /* space reserved in transaction batch:
613 . 3 balancings in direct->indirect conversion 613 . 3 balancings in direct->indirect conversion
614 . 1 block involved into reiserfs_update_sd() 614 . 1 block involved into reiserfs_update_sd()
615 XXX in practically impossible worst case direct2indirect() 615 XXX in practically impossible worst case direct2indirect()
616 can incur (much) more than 3 balancings. 616 can incur (much) more than 3 balancings.
617 quota update for user, group */ 617 quota update for user, group */
618 int jbegin_count = 618 int jbegin_count =
619 JOURNAL_PER_BALANCE_CNT * 3 + 1 + 619 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
620 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 620 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
621 int version; 621 int version;
622 int dangle = 1; 622 int dangle = 1;
623 loff_t new_offset = 623 loff_t new_offset =
624 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; 624 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
625 625
626 /* bad.... */ 626 /* bad.... */
627 reiserfs_write_lock(inode->i_sb); 627 reiserfs_write_lock(inode->i_sb);
628 version = get_inode_item_key_version(inode); 628 version = get_inode_item_key_version(inode);
629 629
630 if (block < 0) { 630 if (block < 0) {
631 reiserfs_write_unlock(inode->i_sb); 631 reiserfs_write_unlock(inode->i_sb);
632 return -EIO; 632 return -EIO;
633 } 633 }
634 634
635 if (!file_capable(inode, block)) { 635 if (!file_capable(inode, block)) {
636 reiserfs_write_unlock(inode->i_sb); 636 reiserfs_write_unlock(inode->i_sb);
637 return -EFBIG; 637 return -EFBIG;
638 } 638 }
639 639
640 /* if !create, we aren't changing the FS, so we don't need to 640 /* if !create, we aren't changing the FS, so we don't need to
641 ** log anything, so we don't need to start a transaction 641 ** log anything, so we don't need to start a transaction
642 */ 642 */
643 if (!(create & GET_BLOCK_CREATE)) { 643 if (!(create & GET_BLOCK_CREATE)) {
644 int ret; 644 int ret;
645 /* find number of block-th logical block of the file */ 645 /* find number of block-th logical block of the file */
646 ret = _get_block_create_0(inode, block, bh_result, 646 ret = _get_block_create_0(inode, block, bh_result,
647 create | GET_BLOCK_READ_DIRECT); 647 create | GET_BLOCK_READ_DIRECT);
648 reiserfs_write_unlock(inode->i_sb); 648 reiserfs_write_unlock(inode->i_sb);
649 return ret; 649 return ret;
650 } 650 }
651 /* 651 /*
652 * if we're already in a transaction, make sure to close 652 * if we're already in a transaction, make sure to close
653 * any new transactions we start in this func 653 * any new transactions we start in this func
654 */ 654 */
655 if ((create & GET_BLOCK_NO_DANGLE) || 655 if ((create & GET_BLOCK_NO_DANGLE) ||
656 reiserfs_transaction_running(inode->i_sb)) 656 reiserfs_transaction_running(inode->i_sb))
657 dangle = 0; 657 dangle = 0;
658 658
659 /* If file is of such a size, that it might have a tail and tails are enabled 659 /* If file is of such a size, that it might have a tail and tails are enabled
660 ** we should mark it as possibly needing tail packing on close 660 ** we should mark it as possibly needing tail packing on close
661 */ 661 */
662 if ((have_large_tails(inode->i_sb) 662 if ((have_large_tails(inode->i_sb)
663 && inode->i_size < i_block_size(inode) * 4) 663 && inode->i_size < i_block_size(inode) * 4)
664 || (have_small_tails(inode->i_sb) 664 || (have_small_tails(inode->i_sb)
665 && inode->i_size < i_block_size(inode))) 665 && inode->i_size < i_block_size(inode)))
666 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; 666 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
667 667
668 /* set the key of the first byte in the 'block'-th block of file */ 668 /* set the key of the first byte in the 'block'-th block of file */
669 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); 669 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
670 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { 670 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
671 start_trans: 671 start_trans:
672 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); 672 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
673 if (!th) { 673 if (!th) {
674 retval = -ENOMEM; 674 retval = -ENOMEM;
675 goto failure; 675 goto failure;
676 } 676 }
677 reiserfs_update_inode_transaction(inode); 677 reiserfs_update_inode_transaction(inode);
678 } 678 }
679 research: 679 research:
680 680
681 retval = search_for_position_by_key(inode->i_sb, &key, &path); 681 retval = search_for_position_by_key(inode->i_sb, &key, &path);
682 if (retval == IO_ERROR) { 682 if (retval == IO_ERROR) {
683 retval = -EIO; 683 retval = -EIO;
684 goto failure; 684 goto failure;
685 } 685 }
686 686
687 bh = get_last_bh(&path); 687 bh = get_last_bh(&path);
688 ih = get_ih(&path); 688 ih = get_ih(&path);
689 item = get_item(&path); 689 item = get_item(&path);
690 pos_in_item = path.pos_in_item; 690 pos_in_item = path.pos_in_item;
691 691
692 fs_gen = get_generation(inode->i_sb); 692 fs_gen = get_generation(inode->i_sb);
693 copy_item_head(&tmp_ih, ih); 693 copy_item_head(&tmp_ih, ih);
694 694
695 if (allocation_needed 695 if (allocation_needed
696 (retval, allocated_block_nr, ih, item, pos_in_item)) { 696 (retval, allocated_block_nr, ih, item, pos_in_item)) {
697 /* we have to allocate block for the unformatted node */ 697 /* we have to allocate block for the unformatted node */
698 if (!th) { 698 if (!th) {
699 pathrelse(&path); 699 pathrelse(&path);
700 goto start_trans; 700 goto start_trans;
701 } 701 }
702 702
703 repeat = 703 repeat =
704 _allocate_block(th, block, inode, &allocated_block_nr, 704 _allocate_block(th, block, inode, &allocated_block_nr,
705 &path, create); 705 &path, create);
706 706
707 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { 707 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
708 /* restart the transaction to give the journal a chance to free 708 /* restart the transaction to give the journal a chance to free
709 ** some blocks. releases the path, so we have to go back to 709 ** some blocks. releases the path, so we have to go back to
710 ** research if we succeed on the second try 710 ** research if we succeed on the second try
711 */ 711 */
712 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; 712 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
713 retval = restart_transaction(th, inode, &path); 713 retval = restart_transaction(th, inode, &path);
714 if (retval) 714 if (retval)
715 goto failure; 715 goto failure;
716 repeat = 716 repeat =
717 _allocate_block(th, block, inode, 717 _allocate_block(th, block, inode,
718 &allocated_block_nr, NULL, create); 718 &allocated_block_nr, NULL, create);
719 719
720 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { 720 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
721 goto research; 721 goto research;
722 } 722 }
723 if (repeat == QUOTA_EXCEEDED) 723 if (repeat == QUOTA_EXCEEDED)
724 retval = -EDQUOT; 724 retval = -EDQUOT;
725 else 725 else
726 retval = -ENOSPC; 726 retval = -ENOSPC;
727 goto failure; 727 goto failure;
728 } 728 }
729 729
730 if (fs_changed(fs_gen, inode->i_sb) 730 if (fs_changed(fs_gen, inode->i_sb)
731 && item_moved(&tmp_ih, &path)) { 731 && item_moved(&tmp_ih, &path)) {
732 goto research; 732 goto research;
733 } 733 }
734 } 734 }
735 735
736 if (indirect_item_found(retval, ih)) { 736 if (indirect_item_found(retval, ih)) {
737 b_blocknr_t unfm_ptr; 737 b_blocknr_t unfm_ptr;
738 /* 'block'-th block is in the file already (there is 738 /* 'block'-th block is in the file already (there is
739 corresponding cell in some indirect item). But it may be 739 corresponding cell in some indirect item). But it may be
740 zero unformatted node pointer (hole) */ 740 zero unformatted node pointer (hole) */
741 unfm_ptr = get_block_num(item, pos_in_item); 741 unfm_ptr = get_block_num(item, pos_in_item);
742 if (unfm_ptr == 0) { 742 if (unfm_ptr == 0) {
743 /* use allocated block to plug the hole */ 743 /* use allocated block to plug the hole */
744 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); 744 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
745 if (fs_changed(fs_gen, inode->i_sb) 745 if (fs_changed(fs_gen, inode->i_sb)
746 && item_moved(&tmp_ih, &path)) { 746 && item_moved(&tmp_ih, &path)) {
747 reiserfs_restore_prepared_buffer(inode->i_sb, 747 reiserfs_restore_prepared_buffer(inode->i_sb,
748 bh); 748 bh);
749 goto research; 749 goto research;
750 } 750 }
751 set_buffer_new(bh_result); 751 set_buffer_new(bh_result);
752 if (buffer_dirty(bh_result) 752 if (buffer_dirty(bh_result)
753 && reiserfs_data_ordered(inode->i_sb)) 753 && reiserfs_data_ordered(inode->i_sb))
754 reiserfs_add_ordered_list(inode, bh_result); 754 reiserfs_add_ordered_list(inode, bh_result);
755 put_block_num(item, pos_in_item, allocated_block_nr); 755 put_block_num(item, pos_in_item, allocated_block_nr);
756 unfm_ptr = allocated_block_nr; 756 unfm_ptr = allocated_block_nr;
757 journal_mark_dirty(th, inode->i_sb, bh); 757 journal_mark_dirty(th, inode->i_sb, bh);
758 reiserfs_update_sd(th, inode); 758 reiserfs_update_sd(th, inode);
759 } 759 }
760 set_block_dev_mapped(bh_result, unfm_ptr, inode); 760 set_block_dev_mapped(bh_result, unfm_ptr, inode);
761 pathrelse(&path); 761 pathrelse(&path);
762 retval = 0; 762 retval = 0;
763 if (!dangle && th) 763 if (!dangle && th)
764 retval = reiserfs_end_persistent_transaction(th); 764 retval = reiserfs_end_persistent_transaction(th);
765 765
766 reiserfs_write_unlock(inode->i_sb); 766 reiserfs_write_unlock(inode->i_sb);
767 767
768 /* the item was found, so new blocks were not added to the file 768 /* the item was found, so new blocks were not added to the file
769 ** there is no need to make sure the inode is updated with this 769 ** there is no need to make sure the inode is updated with this
770 ** transaction 770 ** transaction
771 */ 771 */
772 return retval; 772 return retval;
773 } 773 }
774 774
775 if (!th) { 775 if (!th) {
776 pathrelse(&path); 776 pathrelse(&path);
777 goto start_trans; 777 goto start_trans;
778 } 778 }
779 779
780 /* desired position is not found or is in the direct item. We have 780 /* desired position is not found or is in the direct item. We have
781 to append file with holes up to 'block'-th block converting 781 to append file with holes up to 'block'-th block converting
782 direct items to indirect one if necessary */ 782 direct items to indirect one if necessary */
783 done = 0; 783 done = 0;
784 do { 784 do {
785 if (is_statdata_le_ih(ih)) { 785 if (is_statdata_le_ih(ih)) {
786 __le32 unp = 0; 786 __le32 unp = 0;
787 struct cpu_key tmp_key; 787 struct cpu_key tmp_key;
788 788
789 /* indirect item has to be inserted */ 789 /* indirect item has to be inserted */
790 make_le_item_head(&tmp_ih, &key, version, 1, 790 make_le_item_head(&tmp_ih, &key, version, 1,
791 TYPE_INDIRECT, UNFM_P_SIZE, 791 TYPE_INDIRECT, UNFM_P_SIZE,
792 0 /* free_space */ ); 792 0 /* free_space */ );
793 793
794 if (cpu_key_k_offset(&key) == 1) { 794 if (cpu_key_k_offset(&key) == 1) {
795 /* we are going to add 'block'-th block to the file. Use 795 /* we are going to add 'block'-th block to the file. Use
796 allocated block for that */ 796 allocated block for that */
797 unp = cpu_to_le32(allocated_block_nr); 797 unp = cpu_to_le32(allocated_block_nr);
798 set_block_dev_mapped(bh_result, 798 set_block_dev_mapped(bh_result,
799 allocated_block_nr, inode); 799 allocated_block_nr, inode);
800 set_buffer_new(bh_result); 800 set_buffer_new(bh_result);
801 done = 1; 801 done = 1;
802 } 802 }
803 tmp_key = key; // ;) 803 tmp_key = key; // ;)
804 set_cpu_key_k_offset(&tmp_key, 1); 804 set_cpu_key_k_offset(&tmp_key, 1);
805 PATH_LAST_POSITION(&path)++; 805 PATH_LAST_POSITION(&path)++;
806 806
807 retval = 807 retval =
808 reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, 808 reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
809 inode, (char *)&unp); 809 inode, (char *)&unp);
810 if (retval) { 810 if (retval) {
811 reiserfs_free_block(th, inode, 811 reiserfs_free_block(th, inode,
812 allocated_block_nr, 1); 812 allocated_block_nr, 1);
813 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST 813 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
814 } 814 }
815 //mark_tail_converted (inode); 815 //mark_tail_converted (inode);
816 } else if (is_direct_le_ih(ih)) { 816 } else if (is_direct_le_ih(ih)) {
817 /* direct item has to be converted */ 817 /* direct item has to be converted */
818 loff_t tail_offset; 818 loff_t tail_offset;
819 819
820 tail_offset = 820 tail_offset =
821 ((le_ih_k_offset(ih) - 821 ((le_ih_k_offset(ih) -
822 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; 822 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
823 if (tail_offset == cpu_key_k_offset(&key)) { 823 if (tail_offset == cpu_key_k_offset(&key)) {
824 /* direct item we just found fits into block we have 824 /* direct item we just found fits into block we have
825 to map. Convert it into unformatted node: use 825 to map. Convert it into unformatted node: use
826 bh_result for the conversion */ 826 bh_result for the conversion */
827 set_block_dev_mapped(bh_result, 827 set_block_dev_mapped(bh_result,
828 allocated_block_nr, inode); 828 allocated_block_nr, inode);
829 unbh = bh_result; 829 unbh = bh_result;
830 done = 1; 830 done = 1;
831 } else { 831 } else {
832 /* we have to padd file tail stored in direct item(s) 832 /* we have to padd file tail stored in direct item(s)
833 up to block size and convert it to unformatted 833 up to block size and convert it to unformatted
834 node. FIXME: this should also get into page cache */ 834 node. FIXME: this should also get into page cache */
835 835
836 pathrelse(&path); 836 pathrelse(&path);
837 /* 837 /*
838 * ugly, but we can only end the transaction if 838 * ugly, but we can only end the transaction if
839 * we aren't nested 839 * we aren't nested
840 */ 840 */
841 BUG_ON(!th->t_refcount); 841 BUG_ON(!th->t_refcount);
842 if (th->t_refcount == 1) { 842 if (th->t_refcount == 1) {
843 retval = 843 retval =
844 reiserfs_end_persistent_transaction 844 reiserfs_end_persistent_transaction
845 (th); 845 (th);
846 th = NULL; 846 th = NULL;
847 if (retval) 847 if (retval)
848 goto failure; 848 goto failure;
849 } 849 }
850 850
851 retval = 851 retval =
852 convert_tail_for_hole(inode, bh_result, 852 convert_tail_for_hole(inode, bh_result,
853 tail_offset); 853 tail_offset);
854 if (retval) { 854 if (retval) {
855 if (retval != -ENOSPC) 855 if (retval != -ENOSPC)
856 reiserfs_warning(inode->i_sb, 856 reiserfs_warning(inode->i_sb,
857 "clm-6004: convert tail failed inode %lu, error %d", 857 "clm-6004: convert tail failed inode %lu, error %d",
858 inode->i_ino, 858 inode->i_ino,
859 retval); 859 retval);
860 if (allocated_block_nr) { 860 if (allocated_block_nr) {
861 /* the bitmap, the super, and the stat data == 3 */ 861 /* the bitmap, the super, and the stat data == 3 */
862 if (!th) 862 if (!th)
863 th = reiserfs_persistent_transaction(inode->i_sb, 3); 863 th = reiserfs_persistent_transaction(inode->i_sb, 3);
864 if (th) 864 if (th)
865 reiserfs_free_block(th, 865 reiserfs_free_block(th,
866 inode, 866 inode,
867 allocated_block_nr, 867 allocated_block_nr,
868 1); 868 1);
869 } 869 }
870 goto failure; 870 goto failure;
871 } 871 }
872 goto research; 872 goto research;
873 } 873 }
874 retval = 874 retval =
875 direct2indirect(th, inode, &path, unbh, 875 direct2indirect(th, inode, &path, unbh,
876 tail_offset); 876 tail_offset);
877 if (retval) { 877 if (retval) {
878 reiserfs_unmap_buffer(unbh); 878 reiserfs_unmap_buffer(unbh);
879 reiserfs_free_block(th, inode, 879 reiserfs_free_block(th, inode,
880 allocated_block_nr, 1); 880 allocated_block_nr, 1);
881 goto failure; 881 goto failure;
882 } 882 }
883 /* it is important the set_buffer_uptodate is done after 883 /* it is important the set_buffer_uptodate is done after
884 ** the direct2indirect. The buffer might contain valid 884 ** the direct2indirect. The buffer might contain valid
885 ** data newer than the data on disk (read by readpage, changed, 885 ** data newer than the data on disk (read by readpage, changed,
886 ** and then sent here by writepage). direct2indirect needs 886 ** and then sent here by writepage). direct2indirect needs
887 ** to know if unbh was already up to date, so it can decide 887 ** to know if unbh was already up to date, so it can decide
888 ** if the data in unbh needs to be replaced with data from 888 ** if the data in unbh needs to be replaced with data from
889 ** the disk 889 ** the disk
890 */ 890 */
891 set_buffer_uptodate(unbh); 891 set_buffer_uptodate(unbh);
892 892
893 /* unbh->b_page == NULL in case of DIRECT_IO request, this means 893 /* unbh->b_page == NULL in case of DIRECT_IO request, this means
894 buffer will disappear shortly, so it should not be added to 894 buffer will disappear shortly, so it should not be added to
895 */ 895 */
896 if (unbh->b_page) { 896 if (unbh->b_page) {
897 /* we've converted the tail, so we must 897 /* we've converted the tail, so we must
898 ** flush unbh before the transaction commits 898 ** flush unbh before the transaction commits
899 */ 899 */
900 reiserfs_add_tail_list(inode, unbh); 900 reiserfs_add_tail_list(inode, unbh);
901 901
902 /* mark it dirty now to prevent commit_write from adding 902 /* mark it dirty now to prevent commit_write from adding
903 ** this buffer to the inode's dirty buffer list 903 ** this buffer to the inode's dirty buffer list
904 */ 904 */
905 /* 905 /*
906 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). 906 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
907 * It's still atomic, but it sets the page dirty too, 907 * It's still atomic, but it sets the page dirty too,
908 * which makes it eligible for writeback at any time by the 908 * which makes it eligible for writeback at any time by the
909 * VM (which was also the case with __mark_buffer_dirty()) 909 * VM (which was also the case with __mark_buffer_dirty())
910 */ 910 */
911 mark_buffer_dirty(unbh); 911 mark_buffer_dirty(unbh);
912 } 912 }
913 } else { 913 } else {
914 /* append indirect item with holes if needed, when appending 914 /* append indirect item with holes if needed, when appending
915 pointer to 'block'-th block use block, which is already 915 pointer to 'block'-th block use block, which is already
916 allocated */ 916 allocated */
917 struct cpu_key tmp_key; 917 struct cpu_key tmp_key;
918 unp_t unf_single = 0; // We use this in case we need to allocate only 918 unp_t unf_single = 0; // We use this in case we need to allocate only
919 // one block which is a fastpath 919 // one block which is a fastpath
920 unp_t *un; 920 unp_t *un;
921 __u64 max_to_insert = 921 __u64 max_to_insert =
922 MAX_ITEM_LEN(inode->i_sb->s_blocksize) / 922 MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
923 UNFM_P_SIZE; 923 UNFM_P_SIZE;
924 __u64 blocks_needed; 924 __u64 blocks_needed;
925 925
926 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, 926 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
927 "vs-804: invalid position for append"); 927 "vs-804: invalid position for append");
928 /* indirect item has to be appended, set up key of that position */ 928 /* indirect item has to be appended, set up key of that position */
929 make_cpu_key(&tmp_key, inode, 929 make_cpu_key(&tmp_key, inode,
930 le_key_k_offset(version, 930 le_key_k_offset(version,
931 &(ih->ih_key)) + 931 &(ih->ih_key)) +
932 op_bytes_number(ih, 932 op_bytes_number(ih,
933 inode->i_sb->s_blocksize), 933 inode->i_sb->s_blocksize),
934 //pos_in_item * inode->i_sb->s_blocksize, 934 //pos_in_item * inode->i_sb->s_blocksize,
935 TYPE_INDIRECT, 3); // key type is unimportant 935 TYPE_INDIRECT, 3); // key type is unimportant
936 936
937 blocks_needed = 937 blocks_needed =
938 1 + 938 1 +
939 ((cpu_key_k_offset(&key) - 939 ((cpu_key_k_offset(&key) -
940 cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> 940 cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
941 s_blocksize_bits); 941 s_blocksize_bits);
942 RFALSE(blocks_needed < 0, "green-805: invalid offset"); 942 RFALSE(blocks_needed < 0, "green-805: invalid offset");
943 943
944 if (blocks_needed == 1) { 944 if (blocks_needed == 1) {
945 un = &unf_single; 945 un = &unf_single;
946 } else { 946 } else {
947 un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. 947 un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling.
948 if (!un) { 948 if (!un) {
949 un = &unf_single; 949 un = &unf_single;
950 blocks_needed = 1; 950 blocks_needed = 1;
951 max_to_insert = 0; 951 max_to_insert = 0;
952 } else 952 } else
953 memset(un, 0, 953 memset(un, 0,
954 UNFM_P_SIZE * min(blocks_needed, 954 UNFM_P_SIZE * min(blocks_needed,
955 max_to_insert)); 955 max_to_insert));
956 } 956 }
957 if (blocks_needed <= max_to_insert) { 957 if (blocks_needed <= max_to_insert) {
958 /* we are going to add target block to the file. Use allocated 958 /* we are going to add target block to the file. Use allocated
959 block for that */ 959 block for that */
960 un[blocks_needed - 1] = 960 un[blocks_needed - 1] =
961 cpu_to_le32(allocated_block_nr); 961 cpu_to_le32(allocated_block_nr);
962 set_block_dev_mapped(bh_result, 962 set_block_dev_mapped(bh_result,
963 allocated_block_nr, inode); 963 allocated_block_nr, inode);
964 set_buffer_new(bh_result); 964 set_buffer_new(bh_result);
965 done = 1; 965 done = 1;
966 } else { 966 } else {
967 /* paste hole to the indirect item */ 967 /* paste hole to the indirect item */
968 /* If kmalloc failed, max_to_insert becomes zero and it means we 968 /* If kmalloc failed, max_to_insert becomes zero and it means we
969 only have space for one block */ 969 only have space for one block */
970 blocks_needed = 970 blocks_needed =
971 max_to_insert ? max_to_insert : 1; 971 max_to_insert ? max_to_insert : 1;
972 } 972 }
973 retval = 973 retval =
974 reiserfs_paste_into_item(th, &path, &tmp_key, inode, 974 reiserfs_paste_into_item(th, &path, &tmp_key, inode,
975 (char *)un, 975 (char *)un,
976 UNFM_P_SIZE * 976 UNFM_P_SIZE *
977 blocks_needed); 977 blocks_needed);
978 978
979 if (blocks_needed != 1) 979 if (blocks_needed != 1)
980 kfree(un); 980 kfree(un);
981 981
982 if (retval) { 982 if (retval) {
983 reiserfs_free_block(th, inode, 983 reiserfs_free_block(th, inode,
984 allocated_block_nr, 1); 984 allocated_block_nr, 1);
985 goto failure; 985 goto failure;
986 } 986 }
987 if (!done) { 987 if (!done) {
988 /* We need to mark new file size in case this function will be 988 /* We need to mark new file size in case this function will be
989 interrupted/aborted later on. And we may do this only for 989 interrupted/aborted later on. And we may do this only for
990 holes. */ 990 holes. */
991 inode->i_size += 991 inode->i_size +=
992 inode->i_sb->s_blocksize * blocks_needed; 992 inode->i_sb->s_blocksize * blocks_needed;
993 } 993 }
994 } 994 }
995 995
996 if (done == 1) 996 if (done == 1)
997 break; 997 break;
998 998
999 /* this loop could log more blocks than we had originally asked 999 /* this loop could log more blocks than we had originally asked
1000 ** for. So, we have to allow the transaction to end if it is 1000 ** for. So, we have to allow the transaction to end if it is
1001 ** too big or too full. Update the inode so things are 1001 ** too big or too full. Update the inode so things are
1002 ** consistent if we crash before the function returns 1002 ** consistent if we crash before the function returns
1003 ** 1003 **
1004 ** release the path so that anybody waiting on the path before 1004 ** release the path so that anybody waiting on the path before
1005 ** ending their transaction will be able to continue. 1005 ** ending their transaction will be able to continue.
1006 */ 1006 */
1007 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 1007 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1008 retval = restart_transaction(th, inode, &path); 1008 retval = restart_transaction(th, inode, &path);
1009 if (retval) 1009 if (retval)
1010 goto failure; 1010 goto failure;
1011 } 1011 }
1012 /* inserting indirect pointers for a hole can take a 1012 /* inserting indirect pointers for a hole can take a
1013 ** long time. reschedule if needed 1013 ** long time. reschedule if needed
1014 */ 1014 */
1015 cond_resched(); 1015 cond_resched();
1016 1016
1017 retval = search_for_position_by_key(inode->i_sb, &key, &path); 1017 retval = search_for_position_by_key(inode->i_sb, &key, &path);
1018 if (retval == IO_ERROR) { 1018 if (retval == IO_ERROR) {
1019 retval = -EIO; 1019 retval = -EIO;
1020 goto failure; 1020 goto failure;
1021 } 1021 }
1022 if (retval == POSITION_FOUND) { 1022 if (retval == POSITION_FOUND) {
1023 reiserfs_warning(inode->i_sb, 1023 reiserfs_warning(inode->i_sb,
1024 "vs-825: reiserfs_get_block: " 1024 "vs-825: reiserfs_get_block: "
1025 "%K should not be found", &key); 1025 "%K should not be found", &key);
1026 retval = -EEXIST; 1026 retval = -EEXIST;
1027 if (allocated_block_nr) 1027 if (allocated_block_nr)
1028 reiserfs_free_block(th, inode, 1028 reiserfs_free_block(th, inode,
1029 allocated_block_nr, 1); 1029 allocated_block_nr, 1);
1030 pathrelse(&path); 1030 pathrelse(&path);
1031 goto failure; 1031 goto failure;
1032 } 1032 }
1033 bh = get_last_bh(&path); 1033 bh = get_last_bh(&path);
1034 ih = get_ih(&path); 1034 ih = get_ih(&path);
1035 item = get_item(&path); 1035 item = get_item(&path);
1036 pos_in_item = path.pos_in_item; 1036 pos_in_item = path.pos_in_item;
1037 } while (1); 1037 } while (1);
1038 1038
1039 retval = 0; 1039 retval = 0;
1040 1040
1041 failure: 1041 failure:
1042 if (th && (!dangle || (retval && !th->t_trans_id))) { 1042 if (th && (!dangle || (retval && !th->t_trans_id))) {
1043 int err; 1043 int err;
1044 if (th->t_trans_id) 1044 if (th->t_trans_id)
1045 reiserfs_update_sd(th, inode); 1045 reiserfs_update_sd(th, inode);
1046 err = reiserfs_end_persistent_transaction(th); 1046 err = reiserfs_end_persistent_transaction(th);
1047 if (err) 1047 if (err)
1048 retval = err; 1048 retval = err;
1049 } 1049 }
1050 1050
1051 reiserfs_write_unlock(inode->i_sb); 1051 reiserfs_write_unlock(inode->i_sb);
1052 reiserfs_check_path(&path); 1052 reiserfs_check_path(&path);
1053 return retval; 1053 return retval;
1054 } 1054 }
1055 1055
1056 static int 1056 static int
1057 reiserfs_readpages(struct file *file, struct address_space *mapping, 1057 reiserfs_readpages(struct file *file, struct address_space *mapping,
1058 struct list_head *pages, unsigned nr_pages) 1058 struct list_head *pages, unsigned nr_pages)
1059 { 1059 {
1060 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); 1060 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
1061 } 1061 }
1062 1062
1063 /* Compute real number of used bytes by file 1063 /* Compute real number of used bytes by file
1064 * Following three functions can go away when we'll have enough space in stat item 1064 * Following three functions can go away when we'll have enough space in stat item
1065 */ 1065 */
1066 static int real_space_diff(struct inode *inode, int sd_size) 1066 static int real_space_diff(struct inode *inode, int sd_size)
1067 { 1067 {
1068 int bytes; 1068 int bytes;
1069 loff_t blocksize = inode->i_sb->s_blocksize; 1069 loff_t blocksize = inode->i_sb->s_blocksize;
1070 1070
1071 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) 1071 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1072 return sd_size; 1072 return sd_size;
1073 1073
1074 /* End of file is also in full block with indirect reference, so round 1074 /* End of file is also in full block with indirect reference, so round
1075 ** up to the next block. 1075 ** up to the next block.
1076 ** 1076 **
1077 ** there is just no way to know if the tail is actually packed 1077 ** there is just no way to know if the tail is actually packed
1078 ** on the file, so we have to assume it isn't. When we pack the 1078 ** on the file, so we have to assume it isn't. When we pack the
1079 ** tail, we add 4 bytes to pretend there really is an unformatted 1079 ** tail, we add 4 bytes to pretend there really is an unformatted
1080 ** node pointer 1080 ** node pointer
1081 */ 1081 */
1082 bytes = 1082 bytes =
1083 ((inode->i_size + 1083 ((inode->i_size +
1084 (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + 1084 (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
1085 sd_size; 1085 sd_size;
1086 return bytes; 1086 return bytes;
1087 } 1087 }
1088 1088
1089 static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, 1089 static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1090 int sd_size) 1090 int sd_size)
1091 { 1091 {
1092 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { 1092 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1093 return inode->i_size + 1093 return inode->i_size +
1094 (loff_t) (real_space_diff(inode, sd_size)); 1094 (loff_t) (real_space_diff(inode, sd_size));
1095 } 1095 }
1096 return ((loff_t) real_space_diff(inode, sd_size)) + 1096 return ((loff_t) real_space_diff(inode, sd_size)) +
1097 (((loff_t) blocks) << 9); 1097 (((loff_t) blocks) << 9);
1098 } 1098 }
1099 1099
1100 /* Compute number of blocks used by file in ReiserFS counting */ 1100 /* Compute number of blocks used by file in ReiserFS counting */
1101 static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) 1101 static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1102 { 1102 {
1103 loff_t bytes = inode_get_bytes(inode); 1103 loff_t bytes = inode_get_bytes(inode);
1104 loff_t real_space = real_space_diff(inode, sd_size); 1104 loff_t real_space = real_space_diff(inode, sd_size);
1105 1105
1106 /* keeps fsck and non-quota versions of reiserfs happy */ 1106 /* keeps fsck and non-quota versions of reiserfs happy */
1107 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { 1107 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1108 bytes += (loff_t) 511; 1108 bytes += (loff_t) 511;
1109 } 1109 }
1110 1110
1111 /* files from before the quota patch might i_blocks such that 1111 /* files from before the quota patch might i_blocks such that
1112 ** bytes < real_space. Deal with that here to prevent it from 1112 ** bytes < real_space. Deal with that here to prevent it from
1113 ** going negative. 1113 ** going negative.
1114 */ 1114 */
1115 if (bytes < real_space) 1115 if (bytes < real_space)
1116 return 0; 1116 return 0;
1117 return (bytes - real_space) >> 9; 1117 return (bytes - real_space) >> 9;
1118 } 1118 }
1119 1119
1120 // 1120 //
1121 // BAD: new directories have stat data of new type and all other items 1121 // BAD: new directories have stat data of new type and all other items
1122 // of old type. Version stored in the inode says about body items, so 1122 // of old type. Version stored in the inode says about body items, so
1123 // in update_stat_data we can not rely on inode, but have to check 1123 // in update_stat_data we can not rely on inode, but have to check
1124 // item version directly 1124 // item version directly
1125 // 1125 //
1126 1126
1127 // called by read_locked_inode 1127 // called by read_locked_inode
1128 static void init_inode(struct inode *inode, struct path *path) 1128 static void init_inode(struct inode *inode, struct path *path)
1129 { 1129 {
1130 struct buffer_head *bh; 1130 struct buffer_head *bh;
1131 struct item_head *ih; 1131 struct item_head *ih;
1132 __u32 rdev; 1132 __u32 rdev;
1133 //int version = ITEM_VERSION_1; 1133 //int version = ITEM_VERSION_1;
1134 1134
1135 bh = PATH_PLAST_BUFFER(path); 1135 bh = PATH_PLAST_BUFFER(path);
1136 ih = PATH_PITEM_HEAD(path); 1136 ih = PATH_PITEM_HEAD(path);
1137 1137
1138 copy_key(INODE_PKEY(inode), &(ih->ih_key)); 1138 copy_key(INODE_PKEY(inode), &(ih->ih_key));
1139 inode->i_blksize = reiserfs_default_io_size; 1139 inode->i_blksize = reiserfs_default_io_size;
1140 1140
1141 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); 1141 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1142 REISERFS_I(inode)->i_flags = 0; 1142 REISERFS_I(inode)->i_flags = 0;
1143 REISERFS_I(inode)->i_prealloc_block = 0; 1143 REISERFS_I(inode)->i_prealloc_block = 0;
1144 REISERFS_I(inode)->i_prealloc_count = 0; 1144 REISERFS_I(inode)->i_prealloc_count = 0;
1145 REISERFS_I(inode)->i_trans_id = 0; 1145 REISERFS_I(inode)->i_trans_id = 0;
1146 REISERFS_I(inode)->i_jl = NULL; 1146 REISERFS_I(inode)->i_jl = NULL;
1147 REISERFS_I(inode)->i_acl_access = NULL; 1147 REISERFS_I(inode)->i_acl_access = NULL;
1148 REISERFS_I(inode)->i_acl_default = NULL; 1148 REISERFS_I(inode)->i_acl_default = NULL;
1149 init_rwsem(&REISERFS_I(inode)->xattr_sem); 1149 init_rwsem(&REISERFS_I(inode)->xattr_sem);
1150 1150
1151 if (stat_data_v1(ih)) { 1151 if (stat_data_v1(ih)) {
1152 struct stat_data_v1 *sd = 1152 struct stat_data_v1 *sd =
1153 (struct stat_data_v1 *)B_I_PITEM(bh, ih); 1153 (struct stat_data_v1 *)B_I_PITEM(bh, ih);
1154 unsigned long blocks; 1154 unsigned long blocks;
1155 1155
1156 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1156 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1157 set_inode_sd_version(inode, STAT_DATA_V1); 1157 set_inode_sd_version(inode, STAT_DATA_V1);
1158 inode->i_mode = sd_v1_mode(sd); 1158 inode->i_mode = sd_v1_mode(sd);
1159 inode->i_nlink = sd_v1_nlink(sd); 1159 inode->i_nlink = sd_v1_nlink(sd);
1160 inode->i_uid = sd_v1_uid(sd); 1160 inode->i_uid = sd_v1_uid(sd);
1161 inode->i_gid = sd_v1_gid(sd); 1161 inode->i_gid = sd_v1_gid(sd);
1162 inode->i_size = sd_v1_size(sd); 1162 inode->i_size = sd_v1_size(sd);
1163 inode->i_atime.tv_sec = sd_v1_atime(sd); 1163 inode->i_atime.tv_sec = sd_v1_atime(sd);
1164 inode->i_mtime.tv_sec = sd_v1_mtime(sd); 1164 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
1165 inode->i_ctime.tv_sec = sd_v1_ctime(sd); 1165 inode->i_ctime.tv_sec = sd_v1_ctime(sd);
1166 inode->i_atime.tv_nsec = 0; 1166 inode->i_atime.tv_nsec = 0;
1167 inode->i_ctime.tv_nsec = 0; 1167 inode->i_ctime.tv_nsec = 0;
1168 inode->i_mtime.tv_nsec = 0; 1168 inode->i_mtime.tv_nsec = 0;
1169 1169
1170 inode->i_blocks = sd_v1_blocks(sd); 1170 inode->i_blocks = sd_v1_blocks(sd);
1171 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1171 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1172 blocks = (inode->i_size + 511) >> 9; 1172 blocks = (inode->i_size + 511) >> 9;
1173 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); 1173 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1174 if (inode->i_blocks > blocks) { 1174 if (inode->i_blocks > blocks) {
1175 // there was a bug in <=3.5.23 when i_blocks could take negative 1175 // there was a bug in <=3.5.23 when i_blocks could take negative
1176 // values. Starting from 3.5.17 this value could even be stored in 1176 // values. Starting from 3.5.17 this value could even be stored in
1177 // stat data. For such files we set i_blocks based on file 1177 // stat data. For such files we set i_blocks based on file
1178 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be 1178 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1179 // only updated if file's inode will ever change 1179 // only updated if file's inode will ever change
1180 inode->i_blocks = blocks; 1180 inode->i_blocks = blocks;
1181 } 1181 }
1182 1182
1183 rdev = sd_v1_rdev(sd); 1183 rdev = sd_v1_rdev(sd);
1184 REISERFS_I(inode)->i_first_direct_byte = 1184 REISERFS_I(inode)->i_first_direct_byte =
1185 sd_v1_first_direct_byte(sd); 1185 sd_v1_first_direct_byte(sd);
1186 /* an early bug in the quota code can give us an odd number for the 1186 /* an early bug in the quota code can give us an odd number for the
1187 ** block count. This is incorrect, fix it here. 1187 ** block count. This is incorrect, fix it here.
1188 */ 1188 */
1189 if (inode->i_blocks & 1) { 1189 if (inode->i_blocks & 1) {
1190 inode->i_blocks++; 1190 inode->i_blocks++;
1191 } 1191 }
1192 inode_set_bytes(inode, 1192 inode_set_bytes(inode,
1193 to_real_used_space(inode, inode->i_blocks, 1193 to_real_used_space(inode, inode->i_blocks,
1194 SD_V1_SIZE)); 1194 SD_V1_SIZE));
1195 /* nopack is initially zero for v1 objects. For v2 objects, 1195 /* nopack is initially zero for v1 objects. For v2 objects,
1196 nopack is initialised from sd_attrs */ 1196 nopack is initialised from sd_attrs */
1197 REISERFS_I(inode)->i_flags &= ~i_nopack_mask; 1197 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1198 } else { 1198 } else {
1199 // new stat data found, but object may have old items 1199 // new stat data found, but object may have old items
1200 // (directories and symlinks) 1200 // (directories and symlinks)
1201 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); 1201 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1202 1202
1203 inode->i_mode = sd_v2_mode(sd); 1203 inode->i_mode = sd_v2_mode(sd);
1204 inode->i_nlink = sd_v2_nlink(sd); 1204 inode->i_nlink = sd_v2_nlink(sd);
1205 inode->i_uid = sd_v2_uid(sd); 1205 inode->i_uid = sd_v2_uid(sd);
1206 inode->i_size = sd_v2_size(sd); 1206 inode->i_size = sd_v2_size(sd);
1207 inode->i_gid = sd_v2_gid(sd); 1207 inode->i_gid = sd_v2_gid(sd);
1208 inode->i_mtime.tv_sec = sd_v2_mtime(sd); 1208 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1209 inode->i_atime.tv_sec = sd_v2_atime(sd); 1209 inode->i_atime.tv_sec = sd_v2_atime(sd);
1210 inode->i_ctime.tv_sec = sd_v2_ctime(sd); 1210 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1211 inode->i_ctime.tv_nsec = 0; 1211 inode->i_ctime.tv_nsec = 0;
1212 inode->i_mtime.tv_nsec = 0; 1212 inode->i_mtime.tv_nsec = 0;
1213 inode->i_atime.tv_nsec = 0; 1213 inode->i_atime.tv_nsec = 0;
1214 inode->i_blocks = sd_v2_blocks(sd); 1214 inode->i_blocks = sd_v2_blocks(sd);
1215 rdev = sd_v2_rdev(sd); 1215 rdev = sd_v2_rdev(sd);
1216 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1216 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1217 inode->i_generation = 1217 inode->i_generation =
1218 le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1218 le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1219 else 1219 else
1220 inode->i_generation = sd_v2_generation(sd); 1220 inode->i_generation = sd_v2_generation(sd);
1221 1221
1222 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 1222 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1223 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1223 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1224 else 1224 else
1225 set_inode_item_key_version(inode, KEY_FORMAT_3_6); 1225 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1226 REISERFS_I(inode)->i_first_direct_byte = 0; 1226 REISERFS_I(inode)->i_first_direct_byte = 0;
1227 set_inode_sd_version(inode, STAT_DATA_V2); 1227 set_inode_sd_version(inode, STAT_DATA_V2);
1228 inode_set_bytes(inode, 1228 inode_set_bytes(inode,
1229 to_real_used_space(inode, inode->i_blocks, 1229 to_real_used_space(inode, inode->i_blocks,
1230 SD_V2_SIZE)); 1230 SD_V2_SIZE));
1231 /* read persistent inode attributes from sd and initalise 1231 /* read persistent inode attributes from sd and initalise
1232 generic inode flags from them */ 1232 generic inode flags from them */
1233 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); 1233 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1234 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); 1234 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1235 } 1235 }
1236 1236
1237 pathrelse(path); 1237 pathrelse(path);
1238 if (S_ISREG(inode->i_mode)) { 1238 if (S_ISREG(inode->i_mode)) {
1239 inode->i_op = &reiserfs_file_inode_operations; 1239 inode->i_op = &reiserfs_file_inode_operations;
1240 inode->i_fop = &reiserfs_file_operations; 1240 inode->i_fop = &reiserfs_file_operations;
1241 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1241 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1242 } else if (S_ISDIR(inode->i_mode)) { 1242 } else if (S_ISDIR(inode->i_mode)) {
1243 inode->i_op = &reiserfs_dir_inode_operations; 1243 inode->i_op = &reiserfs_dir_inode_operations;
1244 inode->i_fop = &reiserfs_dir_operations; 1244 inode->i_fop = &reiserfs_dir_operations;
1245 } else if (S_ISLNK(inode->i_mode)) { 1245 } else if (S_ISLNK(inode->i_mode)) {
1246 inode->i_op = &reiserfs_symlink_inode_operations; 1246 inode->i_op = &reiserfs_symlink_inode_operations;
1247 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1247 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1248 } else { 1248 } else {
1249 inode->i_blocks = 0; 1249 inode->i_blocks = 0;
1250 inode->i_op = &reiserfs_special_inode_operations; 1250 inode->i_op = &reiserfs_special_inode_operations;
1251 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 1251 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1252 } 1252 }
1253 } 1253 }
1254 1254
1255 // update new stat data with inode fields 1255 // update new stat data with inode fields
1256 static void inode2sd(void *sd, struct inode *inode, loff_t size) 1256 static void inode2sd(void *sd, struct inode *inode, loff_t size)
1257 { 1257 {
1258 struct stat_data *sd_v2 = (struct stat_data *)sd; 1258 struct stat_data *sd_v2 = (struct stat_data *)sd;
1259 __u16 flags; 1259 __u16 flags;
1260 1260
1261 set_sd_v2_mode(sd_v2, inode->i_mode); 1261 set_sd_v2_mode(sd_v2, inode->i_mode);
1262 set_sd_v2_nlink(sd_v2, inode->i_nlink); 1262 set_sd_v2_nlink(sd_v2, inode->i_nlink);
1263 set_sd_v2_uid(sd_v2, inode->i_uid); 1263 set_sd_v2_uid(sd_v2, inode->i_uid);
1264 set_sd_v2_size(sd_v2, size); 1264 set_sd_v2_size(sd_v2, size);
1265 set_sd_v2_gid(sd_v2, inode->i_gid); 1265 set_sd_v2_gid(sd_v2, inode->i_gid);
1266 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); 1266 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1267 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); 1267 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1268 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); 1268 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
1269 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); 1269 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1270 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1270 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1271 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); 1271 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1272 else 1272 else
1273 set_sd_v2_generation(sd_v2, inode->i_generation); 1273 set_sd_v2_generation(sd_v2, inode->i_generation);
1274 flags = REISERFS_I(inode)->i_attrs; 1274 flags = REISERFS_I(inode)->i_attrs;
1275 i_attrs_to_sd_attrs(inode, &flags); 1275 i_attrs_to_sd_attrs(inode, &flags);
1276 set_sd_v2_attrs(sd_v2, flags); 1276 set_sd_v2_attrs(sd_v2, flags);
1277 } 1277 }
1278 1278
1279 // used to copy inode's fields to old stat data 1279 // used to copy inode's fields to old stat data
1280 static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) 1280 static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1281 { 1281 {
1282 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; 1282 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1283 1283
1284 set_sd_v1_mode(sd_v1, inode->i_mode); 1284 set_sd_v1_mode(sd_v1, inode->i_mode);
1285 set_sd_v1_uid(sd_v1, inode->i_uid); 1285 set_sd_v1_uid(sd_v1, inode->i_uid);
1286 set_sd_v1_gid(sd_v1, inode->i_gid); 1286 set_sd_v1_gid(sd_v1, inode->i_gid);
1287 set_sd_v1_nlink(sd_v1, inode->i_nlink); 1287 set_sd_v1_nlink(sd_v1, inode->i_nlink);
1288 set_sd_v1_size(sd_v1, size); 1288 set_sd_v1_size(sd_v1, size);
1289 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); 1289 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
1290 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); 1290 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
1291 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); 1291 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
1292 1292
1293 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1293 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1294 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); 1294 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1295 else 1295 else
1296 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); 1296 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1297 1297
1298 // Sigh. i_first_direct_byte is back 1298 // Sigh. i_first_direct_byte is back
1299 set_sd_v1_first_direct_byte(sd_v1, 1299 set_sd_v1_first_direct_byte(sd_v1,
1300 REISERFS_I(inode)->i_first_direct_byte); 1300 REISERFS_I(inode)->i_first_direct_byte);
1301 } 1301 }
1302 1302
1303 /* NOTE, you must prepare the buffer head before sending it here, 1303 /* NOTE, you must prepare the buffer head before sending it here,
1304 ** and then log it after the call 1304 ** and then log it after the call
1305 */ 1305 */
1306 static void update_stat_data(struct path *path, struct inode *inode, 1306 static void update_stat_data(struct path *path, struct inode *inode,
1307 loff_t size) 1307 loff_t size)
1308 { 1308 {
1309 struct buffer_head *bh; 1309 struct buffer_head *bh;
1310 struct item_head *ih; 1310 struct item_head *ih;
1311 1311
1312 bh = PATH_PLAST_BUFFER(path); 1312 bh = PATH_PLAST_BUFFER(path);
1313 ih = PATH_PITEM_HEAD(path); 1313 ih = PATH_PITEM_HEAD(path);
1314 1314
1315 if (!is_statdata_le_ih(ih)) 1315 if (!is_statdata_le_ih(ih))
1316 reiserfs_panic(inode->i_sb, 1316 reiserfs_panic(inode->i_sb,
1317 "vs-13065: update_stat_data: key %k, found item %h", 1317 "vs-13065: update_stat_data: key %k, found item %h",
1318 INODE_PKEY(inode), ih); 1318 INODE_PKEY(inode), ih);
1319 1319
1320 if (stat_data_v1(ih)) { 1320 if (stat_data_v1(ih)) {
1321 // path points to old stat data 1321 // path points to old stat data
1322 inode2sd_v1(B_I_PITEM(bh, ih), inode, size); 1322 inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
1323 } else { 1323 } else {
1324 inode2sd(B_I_PITEM(bh, ih), inode, size); 1324 inode2sd(B_I_PITEM(bh, ih), inode, size);
1325 } 1325 }
1326 1326
1327 return; 1327 return;
1328 } 1328 }
1329 1329
1330 void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, 1330 void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1331 struct inode *inode, loff_t size) 1331 struct inode *inode, loff_t size)
1332 { 1332 {
1333 struct cpu_key key; 1333 struct cpu_key key;
1334 INITIALIZE_PATH(path); 1334 INITIALIZE_PATH(path);
1335 struct buffer_head *bh; 1335 struct buffer_head *bh;
1336 int fs_gen; 1336 int fs_gen;
1337 struct item_head *ih, tmp_ih; 1337 struct item_head *ih, tmp_ih;
1338 int retval; 1338 int retval;
1339 1339
1340 BUG_ON(!th->t_trans_id); 1340 BUG_ON(!th->t_trans_id);
1341 1341
1342 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant 1342 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant
1343 1343
1344 for (;;) { 1344 for (;;) {
1345 int pos; 1345 int pos;
1346 /* look for the object's stat data */ 1346 /* look for the object's stat data */
1347 retval = search_item(inode->i_sb, &key, &path); 1347 retval = search_item(inode->i_sb, &key, &path);
1348 if (retval == IO_ERROR) { 1348 if (retval == IO_ERROR) {
1349 reiserfs_warning(inode->i_sb, 1349 reiserfs_warning(inode->i_sb,
1350 "vs-13050: reiserfs_update_sd: " 1350 "vs-13050: reiserfs_update_sd: "
1351 "i/o failure occurred trying to update %K stat data", 1351 "i/o failure occurred trying to update %K stat data",
1352 &key); 1352 &key);
1353 return; 1353 return;
1354 } 1354 }
1355 if (retval == ITEM_NOT_FOUND) { 1355 if (retval == ITEM_NOT_FOUND) {
1356 pos = PATH_LAST_POSITION(&path); 1356 pos = PATH_LAST_POSITION(&path);
1357 pathrelse(&path); 1357 pathrelse(&path);
1358 if (inode->i_nlink == 0) { 1358 if (inode->i_nlink == 0) {
1359 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ 1359 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1360 return; 1360 return;
1361 } 1361 }
1362 reiserfs_warning(inode->i_sb, 1362 reiserfs_warning(inode->i_sb,
1363 "vs-13060: reiserfs_update_sd: " 1363 "vs-13060: reiserfs_update_sd: "
1364 "stat data of object %k (nlink == %d) not found (pos %d)", 1364 "stat data of object %k (nlink == %d) not found (pos %d)",
1365 INODE_PKEY(inode), inode->i_nlink, 1365 INODE_PKEY(inode), inode->i_nlink,
1366 pos); 1366 pos);
1367 reiserfs_check_path(&path); 1367 reiserfs_check_path(&path);
1368 return; 1368 return;
1369 } 1369 }
1370 1370
1371 /* sigh, prepare_for_journal might schedule. When it schedules the 1371 /* sigh, prepare_for_journal might schedule. When it schedules the
1372 ** FS might change. We have to detect that, and loop back to the 1372 ** FS might change. We have to detect that, and loop back to the
1373 ** search if the stat data item has moved 1373 ** search if the stat data item has moved
1374 */ 1374 */
1375 bh = get_last_bh(&path); 1375 bh = get_last_bh(&path);
1376 ih = get_ih(&path); 1376 ih = get_ih(&path);
1377 copy_item_head(&tmp_ih, ih); 1377 copy_item_head(&tmp_ih, ih);
1378 fs_gen = get_generation(inode->i_sb); 1378 fs_gen = get_generation(inode->i_sb);
1379 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); 1379 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1380 if (fs_changed(fs_gen, inode->i_sb) 1380 if (fs_changed(fs_gen, inode->i_sb)
1381 && item_moved(&tmp_ih, &path)) { 1381 && item_moved(&tmp_ih, &path)) {
1382 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 1382 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1383 continue; /* Stat_data item has been moved after scheduling. */ 1383 continue; /* Stat_data item has been moved after scheduling. */
1384 } 1384 }
1385 break; 1385 break;
1386 } 1386 }
1387 update_stat_data(&path, inode, size); 1387 update_stat_data(&path, inode, size);
1388 journal_mark_dirty(th, th->t_super, bh); 1388 journal_mark_dirty(th, th->t_super, bh);
1389 pathrelse(&path); 1389 pathrelse(&path);
1390 return; 1390 return;
1391 } 1391 }
1392 1392
1393 /* reiserfs_read_locked_inode is called to read the inode off disk, and it 1393 /* reiserfs_read_locked_inode is called to read the inode off disk, and it
1394 ** does a make_bad_inode when things go wrong. But, we need to make sure 1394 ** does a make_bad_inode when things go wrong. But, we need to make sure
1395 ** and clear the key in the private portion of the inode, otherwise a 1395 ** and clear the key in the private portion of the inode, otherwise a
1396 ** corresponding iput might try to delete whatever object the inode last 1396 ** corresponding iput might try to delete whatever object the inode last
1397 ** represented. 1397 ** represented.
1398 */ 1398 */
1399 static void reiserfs_make_bad_inode(struct inode *inode) 1399 static void reiserfs_make_bad_inode(struct inode *inode)
1400 { 1400 {
1401 memset(INODE_PKEY(inode), 0, KEY_SIZE); 1401 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1402 make_bad_inode(inode); 1402 make_bad_inode(inode);
1403 } 1403 }
1404 1404
1405 // 1405 //
1406 // initially this function was derived from minix or ext2's analog and 1406 // initially this function was derived from minix or ext2's analog and
1407 // evolved as the prototype did 1407 // evolved as the prototype did
1408 // 1408 //
1409 1409
1410 int reiserfs_init_locked_inode(struct inode *inode, void *p) 1410 int reiserfs_init_locked_inode(struct inode *inode, void *p)
1411 { 1411 {
1412 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; 1412 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
1413 inode->i_ino = args->objectid; 1413 inode->i_ino = args->objectid;
1414 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); 1414 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1415 return 0; 1415 return 0;
1416 } 1416 }
1417 1417
1418 /* looks for stat data in the tree, and fills up the fields of in-core 1418 /* looks for stat data in the tree, and fills up the fields of in-core
1419 inode stat data fields */ 1419 inode stat data fields */
1420 void reiserfs_read_locked_inode(struct inode *inode, 1420 void reiserfs_read_locked_inode(struct inode *inode,
1421 struct reiserfs_iget_args *args) 1421 struct reiserfs_iget_args *args)
1422 { 1422 {
1423 INITIALIZE_PATH(path_to_sd); 1423 INITIALIZE_PATH(path_to_sd);
1424 struct cpu_key key; 1424 struct cpu_key key;
1425 unsigned long dirino; 1425 unsigned long dirino;
1426 int retval; 1426 int retval;
1427 1427
1428 dirino = args->dirid; 1428 dirino = args->dirid;
1429 1429
1430 /* set version 1, version 2 could be used too, because stat data 1430 /* set version 1, version 2 could be used too, because stat data
1431 key is the same in both versions */ 1431 key is the same in both versions */
1432 key.version = KEY_FORMAT_3_5; 1432 key.version = KEY_FORMAT_3_5;
1433 key.on_disk_key.k_dir_id = dirino; 1433 key.on_disk_key.k_dir_id = dirino;
1434 key.on_disk_key.k_objectid = inode->i_ino; 1434 key.on_disk_key.k_objectid = inode->i_ino;
1435 key.on_disk_key.k_offset = 0; 1435 key.on_disk_key.k_offset = 0;
1436 key.on_disk_key.k_type = 0; 1436 key.on_disk_key.k_type = 0;
1437 1437
1438 /* look for the object's stat data */ 1438 /* look for the object's stat data */
1439 retval = search_item(inode->i_sb, &key, &path_to_sd); 1439 retval = search_item(inode->i_sb, &key, &path_to_sd);
1440 if (retval == IO_ERROR) { 1440 if (retval == IO_ERROR) {
1441 reiserfs_warning(inode->i_sb, 1441 reiserfs_warning(inode->i_sb,
1442 "vs-13070: reiserfs_read_locked_inode: " 1442 "vs-13070: reiserfs_read_locked_inode: "
1443 "i/o failure occurred trying to find stat data of %K", 1443 "i/o failure occurred trying to find stat data of %K",
1444 &key); 1444 &key);
1445 reiserfs_make_bad_inode(inode); 1445 reiserfs_make_bad_inode(inode);
1446 return; 1446 return;
1447 } 1447 }
1448 if (retval != ITEM_FOUND) { 1448 if (retval != ITEM_FOUND) {
1449 /* a stale NFS handle can trigger this without it being an error */ 1449 /* a stale NFS handle can trigger this without it being an error */
1450 pathrelse(&path_to_sd); 1450 pathrelse(&path_to_sd);
1451 reiserfs_make_bad_inode(inode); 1451 reiserfs_make_bad_inode(inode);
1452 inode->i_nlink = 0; 1452 inode->i_nlink = 0;
1453 return; 1453 return;
1454 } 1454 }
1455 1455
1456 init_inode(inode, &path_to_sd); 1456 init_inode(inode, &path_to_sd);
1457 1457
1458 /* It is possible that knfsd is trying to access inode of a file 1458 /* It is possible that knfsd is trying to access inode of a file
1459 that is being removed from the disk by some other thread. As we 1459 that is being removed from the disk by some other thread. As we
1460 update sd on unlink all that is required is to check for nlink 1460 update sd on unlink all that is required is to check for nlink
1461 here. This bug was first found by Sizif when debugging 1461 here. This bug was first found by Sizif when debugging
1462 SquidNG/Butterfly, forgotten, and found again after Philippe 1462 SquidNG/Butterfly, forgotten, and found again after Philippe
1463 Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 1463 Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1464 1464
1465 More logical fix would require changes in fs/inode.c:iput() to 1465 More logical fix would require changes in fs/inode.c:iput() to
1466 remove inode from hash-table _after_ fs cleaned disk stuff up and 1466 remove inode from hash-table _after_ fs cleaned disk stuff up and
1467 in iget() to return NULL if I_FREEING inode is found in 1467 in iget() to return NULL if I_FREEING inode is found in
1468 hash-table. */ 1468 hash-table. */
1469 /* Currently there is one place where it's ok to meet inode with 1469 /* Currently there is one place where it's ok to meet inode with
1470 nlink==0: processing of open-unlinked and half-truncated files 1470 nlink==0: processing of open-unlinked and half-truncated files
1471 during mount (fs/reiserfs/super.c:finish_unfinished()). */ 1471 during mount (fs/reiserfs/super.c:finish_unfinished()). */
1472 if ((inode->i_nlink == 0) && 1472 if ((inode->i_nlink == 0) &&
1473 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { 1473 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1474 reiserfs_warning(inode->i_sb, 1474 reiserfs_warning(inode->i_sb,
1475 "vs-13075: reiserfs_read_locked_inode: " 1475 "vs-13075: reiserfs_read_locked_inode: "
1476 "dead inode read from disk %K. " 1476 "dead inode read from disk %K. "
1477 "This is likely to be race with knfsd. Ignore", 1477 "This is likely to be race with knfsd. Ignore",
1478 &key); 1478 &key);
1479 reiserfs_make_bad_inode(inode); 1479 reiserfs_make_bad_inode(inode);
1480 } 1480 }
1481 1481
1482 reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ 1482 reiserfs_check_path(&path_to_sd); /* init inode should be relsing */
1483 1483
1484 } 1484 }
1485 1485
1486 /** 1486 /**
1487 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). 1487 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1488 * 1488 *
1489 * @inode: inode from hash table to check 1489 * @inode: inode from hash table to check
1490 * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. 1490 * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
1491 * 1491 *
1492 * This function is called by iget5_locked() to distinguish reiserfs inodes 1492 * This function is called by iget5_locked() to distinguish reiserfs inodes
1493 * having the same inode numbers. Such inodes can only exist due to some 1493 * having the same inode numbers. Such inodes can only exist due to some
1494 * error condition. One of them should be bad. Inodes with identical 1494 * error condition. One of them should be bad. Inodes with identical
1495 * inode numbers (objectids) are distinguished by parent directory ids. 1495 * inode numbers (objectids) are distinguished by parent directory ids.
1496 * 1496 *
1497 */ 1497 */
1498 int reiserfs_find_actor(struct inode *inode, void *opaque) 1498 int reiserfs_find_actor(struct inode *inode, void *opaque)
1499 { 1499 {
1500 struct reiserfs_iget_args *args; 1500 struct reiserfs_iget_args *args;
1501 1501
1502 args = opaque; 1502 args = opaque;
1503 /* args is already in CPU order */ 1503 /* args is already in CPU order */
1504 return (inode->i_ino == args->objectid) && 1504 return (inode->i_ino == args->objectid) &&
1505 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); 1505 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1506 } 1506 }
1507 1507
1508 struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) 1508 struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1509 { 1509 {
1510 struct inode *inode; 1510 struct inode *inode;
1511 struct reiserfs_iget_args args; 1511 struct reiserfs_iget_args args;
1512 1512
1513 args.objectid = key->on_disk_key.k_objectid; 1513 args.objectid = key->on_disk_key.k_objectid;
1514 args.dirid = key->on_disk_key.k_dir_id; 1514 args.dirid = key->on_disk_key.k_dir_id;
1515 inode = iget5_locked(s, key->on_disk_key.k_objectid, 1515 inode = iget5_locked(s, key->on_disk_key.k_objectid,
1516 reiserfs_find_actor, reiserfs_init_locked_inode, 1516 reiserfs_find_actor, reiserfs_init_locked_inode,
1517 (void *)(&args)); 1517 (void *)(&args));
1518 if (!inode) 1518 if (!inode)
1519 return ERR_PTR(-ENOMEM); 1519 return ERR_PTR(-ENOMEM);
1520 1520
1521 if (inode->i_state & I_NEW) { 1521 if (inode->i_state & I_NEW) {
1522 reiserfs_read_locked_inode(inode, &args); 1522 reiserfs_read_locked_inode(inode, &args);
1523 unlock_new_inode(inode); 1523 unlock_new_inode(inode);
1524 } 1524 }
1525 1525
1526 if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { 1526 if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
1527 /* either due to i/o error or a stale NFS handle */ 1527 /* either due to i/o error or a stale NFS handle */
1528 iput(inode); 1528 iput(inode);
1529 inode = NULL; 1529 inode = NULL;
1530 } 1530 }
1531 return inode; 1531 return inode;
1532 } 1532 }
1533 1533
1534 struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) 1534 struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp)
1535 { 1535 {
1536 __u32 *data = vobjp; 1536 __u32 *data = vobjp;
1537 struct cpu_key key; 1537 struct cpu_key key;
1538 struct dentry *result; 1538 struct dentry *result;
1539 struct inode *inode; 1539 struct inode *inode;
1540 1540
1541 key.on_disk_key.k_objectid = data[0]; 1541 key.on_disk_key.k_objectid = data[0];
1542 key.on_disk_key.k_dir_id = data[1]; 1542 key.on_disk_key.k_dir_id = data[1];
1543 reiserfs_write_lock(sb); 1543 reiserfs_write_lock(sb);
1544 inode = reiserfs_iget(sb, &key); 1544 inode = reiserfs_iget(sb, &key);
1545 if (inode && !IS_ERR(inode) && data[2] != 0 && 1545 if (inode && !IS_ERR(inode) && data[2] != 0 &&
1546 data[2] != inode->i_generation) { 1546 data[2] != inode->i_generation) {
1547 iput(inode); 1547 iput(inode);
1548 inode = NULL; 1548 inode = NULL;
1549 } 1549 }
1550 reiserfs_write_unlock(sb); 1550 reiserfs_write_unlock(sb);
1551 if (!inode) 1551 if (!inode)
1552 inode = ERR_PTR(-ESTALE); 1552 inode = ERR_PTR(-ESTALE);
1553 if (IS_ERR(inode)) 1553 if (IS_ERR(inode))
1554 return ERR_PTR(PTR_ERR(inode)); 1554 return ERR_PTR(PTR_ERR(inode));
1555 result = d_alloc_anon(inode); 1555 result = d_alloc_anon(inode);
1556 if (!result) { 1556 if (!result) {
1557 iput(inode); 1557 iput(inode);
1558 return ERR_PTR(-ENOMEM); 1558 return ERR_PTR(-ENOMEM);
1559 } 1559 }
1560 return result; 1560 return result;
1561 } 1561 }
1562 1562
1563 struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, 1563 struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data,
1564 int len, int fhtype, 1564 int len, int fhtype,
1565 int (*acceptable) (void *contect, 1565 int (*acceptable) (void *contect,
1566 struct dentry * de), 1566 struct dentry * de),
1567 void *context) 1567 void *context)
1568 { 1568 {
1569 __u32 obj[3], parent[3]; 1569 __u32 obj[3], parent[3];
1570 1570
1571 /* fhtype happens to reflect the number of u32s encoded. 1571 /* fhtype happens to reflect the number of u32s encoded.
1572 * due to a bug in earlier code, fhtype might indicate there 1572 * due to a bug in earlier code, fhtype might indicate there
1573 * are more u32s then actually fitted. 1573 * are more u32s then actually fitted.
1574 * so if fhtype seems to be more than len, reduce fhtype. 1574 * so if fhtype seems to be more than len, reduce fhtype.
1575 * Valid types are: 1575 * Valid types are:
1576 * 2 - objectid + dir_id - legacy support 1576 * 2 - objectid + dir_id - legacy support
1577 * 3 - objectid + dir_id + generation 1577 * 3 - objectid + dir_id + generation
1578 * 4 - objectid + dir_id + objectid and dirid of parent - legacy 1578 * 4 - objectid + dir_id + objectid and dirid of parent - legacy
1579 * 5 - objectid + dir_id + generation + objectid and dirid of parent 1579 * 5 - objectid + dir_id + generation + objectid and dirid of parent
1580 * 6 - as above plus generation of directory 1580 * 6 - as above plus generation of directory
1581 * 6 does not fit in NFSv2 handles 1581 * 6 does not fit in NFSv2 handles
1582 */ 1582 */
1583 if (fhtype > len) { 1583 if (fhtype > len) {
1584 if (fhtype != 6 || len != 5) 1584 if (fhtype != 6 || len != 5)
1585 reiserfs_warning(sb, 1585 reiserfs_warning(sb,
1586 "nfsd/reiserfs, fhtype=%d, len=%d - odd", 1586 "nfsd/reiserfs, fhtype=%d, len=%d - odd",
1587 fhtype, len); 1587 fhtype, len);
1588 fhtype = 5; 1588 fhtype = 5;
1589 } 1589 }
1590 1590
1591 obj[0] = data[0]; 1591 obj[0] = data[0];
1592 obj[1] = data[1]; 1592 obj[1] = data[1];
1593 if (fhtype == 3 || fhtype >= 5) 1593 if (fhtype == 3 || fhtype >= 5)
1594 obj[2] = data[2]; 1594 obj[2] = data[2];
1595 else 1595 else
1596 obj[2] = 0; /* generation number */ 1596 obj[2] = 0; /* generation number */
1597 1597
1598 if (fhtype >= 4) { 1598 if (fhtype >= 4) {
1599 parent[0] = data[fhtype >= 5 ? 3 : 2]; 1599 parent[0] = data[fhtype >= 5 ? 3 : 2];
1600 parent[1] = data[fhtype >= 5 ? 4 : 3]; 1600 parent[1] = data[fhtype >= 5 ? 4 : 3];
1601 if (fhtype == 6) 1601 if (fhtype == 6)
1602 parent[2] = data[5]; 1602 parent[2] = data[5];
1603 else 1603 else
1604 parent[2] = 0; 1604 parent[2] = 0;
1605 } 1605 }
1606 return sb->s_export_op->find_exported_dentry(sb, obj, 1606 return sb->s_export_op->find_exported_dentry(sb, obj,
1607 fhtype < 4 ? NULL : parent, 1607 fhtype < 4 ? NULL : parent,
1608 acceptable, context); 1608 acceptable, context);
1609 } 1609 }
1610 1610
1611 int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, 1611 int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1612 int need_parent) 1612 int need_parent)
1613 { 1613 {
1614 struct inode *inode = dentry->d_inode; 1614 struct inode *inode = dentry->d_inode;
1615 int maxlen = *lenp; 1615 int maxlen = *lenp;
1616 1616
1617 if (maxlen < 3) 1617 if (maxlen < 3)
1618 return 255; 1618 return 255;
1619 1619
1620 data[0] = inode->i_ino; 1620 data[0] = inode->i_ino;
1621 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1621 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1622 data[2] = inode->i_generation; 1622 data[2] = inode->i_generation;
1623 *lenp = 3; 1623 *lenp = 3;
1624 /* no room for directory info? return what we've stored so far */ 1624 /* no room for directory info? return what we've stored so far */
1625 if (maxlen < 5 || !need_parent) 1625 if (maxlen < 5 || !need_parent)
1626 return 3; 1626 return 3;
1627 1627
1628 spin_lock(&dentry->d_lock); 1628 spin_lock(&dentry->d_lock);
1629 inode = dentry->d_parent->d_inode; 1629 inode = dentry->d_parent->d_inode;
1630 data[3] = inode->i_ino; 1630 data[3] = inode->i_ino;
1631 data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1631 data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1632 *lenp = 5; 1632 *lenp = 5;
1633 if (maxlen >= 6) { 1633 if (maxlen >= 6) {
1634 data[5] = inode->i_generation; 1634 data[5] = inode->i_generation;
1635 *lenp = 6; 1635 *lenp = 6;
1636 } 1636 }
1637 spin_unlock(&dentry->d_lock); 1637 spin_unlock(&dentry->d_lock);
1638 return *lenp; 1638 return *lenp;
1639 } 1639 }
1640 1640
1641 /* looks for stat data, then copies fields to it, marks the buffer 1641 /* looks for stat data, then copies fields to it, marks the buffer
1642 containing stat data as dirty */ 1642 containing stat data as dirty */
1643 /* reiserfs inodes are never really dirty, since the dirty inode call 1643 /* reiserfs inodes are never really dirty, since the dirty inode call
1644 ** always logs them. This call allows the VFS inode marking routines 1644 ** always logs them. This call allows the VFS inode marking routines
1645 ** to properly mark inodes for datasync and such, but only actually 1645 ** to properly mark inodes for datasync and such, but only actually
1646 ** does something when called for a synchronous update. 1646 ** does something when called for a synchronous update.
1647 */ 1647 */
1648 int reiserfs_write_inode(struct inode *inode, int do_sync) 1648 int reiserfs_write_inode(struct inode *inode, int do_sync)
1649 { 1649 {
1650 struct reiserfs_transaction_handle th; 1650 struct reiserfs_transaction_handle th;
1651 int jbegin_count = 1; 1651 int jbegin_count = 1;
1652 1652
1653 if (inode->i_sb->s_flags & MS_RDONLY) 1653 if (inode->i_sb->s_flags & MS_RDONLY)
1654 return -EROFS; 1654 return -EROFS;
1655 /* memory pressure can sometimes initiate write_inode calls with sync == 1, 1655 /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1656 ** these cases are just when the system needs ram, not when the 1656 ** these cases are just when the system needs ram, not when the
1657 ** inode needs to reach disk for safety, and they can safely be 1657 ** inode needs to reach disk for safety, and they can safely be
1658 ** ignored because the altered inode has already been logged. 1658 ** ignored because the altered inode has already been logged.
1659 */ 1659 */
1660 if (do_sync && !(current->flags & PF_MEMALLOC)) { 1660 if (do_sync && !(current->flags & PF_MEMALLOC)) {
1661 reiserfs_write_lock(inode->i_sb); 1661 reiserfs_write_lock(inode->i_sb);
1662 if (!journal_begin(&th, inode->i_sb, jbegin_count)) { 1662 if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1663 reiserfs_update_sd(&th, inode); 1663 reiserfs_update_sd(&th, inode);
1664 journal_end_sync(&th, inode->i_sb, jbegin_count); 1664 journal_end_sync(&th, inode->i_sb, jbegin_count);
1665 } 1665 }
1666 reiserfs_write_unlock(inode->i_sb); 1666 reiserfs_write_unlock(inode->i_sb);
1667 } 1667 }
1668 return 0; 1668 return 0;
1669 } 1669 }
1670 1670
1671 /* stat data of new object is inserted already, this inserts the item 1671 /* stat data of new object is inserted already, this inserts the item
1672 containing "." and ".." entries */ 1672 containing "." and ".." entries */
1673 static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, 1673 static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1674 struct inode *inode, 1674 struct inode *inode,
1675 struct item_head *ih, struct path *path, 1675 struct item_head *ih, struct path *path,
1676 struct inode *dir) 1676 struct inode *dir)
1677 { 1677 {
1678 struct super_block *sb = th->t_super; 1678 struct super_block *sb = th->t_super;
1679 char empty_dir[EMPTY_DIR_SIZE]; 1679 char empty_dir[EMPTY_DIR_SIZE];
1680 char *body = empty_dir; 1680 char *body = empty_dir;
1681 struct cpu_key key; 1681 struct cpu_key key;
1682 int retval; 1682 int retval;
1683 1683
1684 BUG_ON(!th->t_trans_id); 1684 BUG_ON(!th->t_trans_id);
1685 1685
1686 _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), 1686 _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
1687 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, 1687 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1688 TYPE_DIRENTRY, 3 /*key length */ ); 1688 TYPE_DIRENTRY, 3 /*key length */ );
1689 1689
1690 /* compose item head for new item. Directories consist of items of 1690 /* compose item head for new item. Directories consist of items of
1691 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it 1691 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1692 is done by reiserfs_new_inode */ 1692 is done by reiserfs_new_inode */
1693 if (old_format_only(sb)) { 1693 if (old_format_only(sb)) {
1694 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, 1694 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1695 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); 1695 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1696 1696
1697 make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, 1697 make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
1698 ih->ih_key.k_objectid, 1698 ih->ih_key.k_objectid,
1699 INODE_PKEY(dir)->k_dir_id, 1699 INODE_PKEY(dir)->k_dir_id,
1700 INODE_PKEY(dir)->k_objectid); 1700 INODE_PKEY(dir)->k_objectid);
1701 } else { 1701 } else {
1702 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, 1702 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1703 TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); 1703 TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1704 1704
1705 make_empty_dir_item(body, ih->ih_key.k_dir_id, 1705 make_empty_dir_item(body, ih->ih_key.k_dir_id,
1706 ih->ih_key.k_objectid, 1706 ih->ih_key.k_objectid,
1707 INODE_PKEY(dir)->k_dir_id, 1707 INODE_PKEY(dir)->k_dir_id,
1708 INODE_PKEY(dir)->k_objectid); 1708 INODE_PKEY(dir)->k_objectid);
1709 } 1709 }
1710 1710
1711 /* look for place in the tree for new item */ 1711 /* look for place in the tree for new item */
1712 retval = search_item(sb, &key, path); 1712 retval = search_item(sb, &key, path);
1713 if (retval == IO_ERROR) { 1713 if (retval == IO_ERROR) {
1714 reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: " 1714 reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: "
1715 "i/o failure occurred creating new directory"); 1715 "i/o failure occurred creating new directory");
1716 return -EIO; 1716 return -EIO;
1717 } 1717 }
1718 if (retval == ITEM_FOUND) { 1718 if (retval == ITEM_FOUND) {
1719 pathrelse(path); 1719 pathrelse(path);
1720 reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: " 1720 reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: "
1721 "object with this key exists (%k)", 1721 "object with this key exists (%k)",
1722 &(ih->ih_key)); 1722 &(ih->ih_key));
1723 return -EEXIST; 1723 return -EEXIST;
1724 } 1724 }
1725 1725
1726 /* insert item, that is empty directory item */ 1726 /* insert item, that is empty directory item */
1727 return reiserfs_insert_item(th, path, &key, ih, inode, body); 1727 return reiserfs_insert_item(th, path, &key, ih, inode, body);
1728 } 1728 }
1729 1729
1730 /* stat data of object has been inserted, this inserts the item 1730 /* stat data of object has been inserted, this inserts the item
1731 containing the body of symlink */ 1731 containing the body of symlink */
1732 static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ 1732 static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */
1733 struct item_head *ih, 1733 struct item_head *ih,
1734 struct path *path, const char *symname, 1734 struct path *path, const char *symname,
1735 int item_len) 1735 int item_len)
1736 { 1736 {
1737 struct super_block *sb = th->t_super; 1737 struct super_block *sb = th->t_super;
1738 struct cpu_key key; 1738 struct cpu_key key;
1739 int retval; 1739 int retval;
1740 1740
1741 BUG_ON(!th->t_trans_id); 1741 BUG_ON(!th->t_trans_id);
1742 1742
1743 _make_cpu_key(&key, KEY_FORMAT_3_5, 1743 _make_cpu_key(&key, KEY_FORMAT_3_5,
1744 le32_to_cpu(ih->ih_key.k_dir_id), 1744 le32_to_cpu(ih->ih_key.k_dir_id),
1745 le32_to_cpu(ih->ih_key.k_objectid), 1745 le32_to_cpu(ih->ih_key.k_objectid),
1746 1, TYPE_DIRECT, 3 /*key length */ ); 1746 1, TYPE_DIRECT, 3 /*key length */ );
1747 1747
1748 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 1748 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
1749 0 /*free_space */ ); 1749 0 /*free_space */ );
1750 1750
1751 /* look for place in the tree for new item */ 1751 /* look for place in the tree for new item */
1752 retval = search_item(sb, &key, path); 1752 retval = search_item(sb, &key, path);
1753 if (retval == IO_ERROR) { 1753 if (retval == IO_ERROR) {
1754 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: " 1754 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: "
1755 "i/o failure occurred creating new symlink"); 1755 "i/o failure occurred creating new symlink");
1756 return -EIO; 1756 return -EIO;
1757 } 1757 }
1758 if (retval == ITEM_FOUND) { 1758 if (retval == ITEM_FOUND) {
1759 pathrelse(path); 1759 pathrelse(path);
1760 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: " 1760 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: "
1761 "object with this key exists (%k)", 1761 "object with this key exists (%k)",
1762 &(ih->ih_key)); 1762 &(ih->ih_key));
1763 return -EEXIST; 1763 return -EEXIST;
1764 } 1764 }
1765 1765
1766 /* insert item, that is body of symlink */ 1766 /* insert item, that is body of symlink */
1767 return reiserfs_insert_item(th, path, &key, ih, inode, symname); 1767 return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1768 } 1768 }
1769 1769
1770 /* inserts the stat data into the tree, and then calls 1770 /* inserts the stat data into the tree, and then calls
1771 reiserfs_new_directory (to insert ".", ".." item if new object is 1771 reiserfs_new_directory (to insert ".", ".." item if new object is
1772 directory) or reiserfs_new_symlink (to insert symlink body if new 1772 directory) or reiserfs_new_symlink (to insert symlink body if new
1773 object is symlink) or nothing (if new object is regular file) 1773 object is symlink) or nothing (if new object is regular file)
1774 1774
1775 NOTE! uid and gid must already be set in the inode. If we return 1775 NOTE! uid and gid must already be set in the inode. If we return
1776 non-zero due to an error, we have to drop the quota previously allocated 1776 non-zero due to an error, we have to drop the quota previously allocated
1777 for the fresh inode. This can only be done outside a transaction, so 1777 for the fresh inode. This can only be done outside a transaction, so
1778 if we return non-zero, we also end the transaction. */ 1778 if we return non-zero, we also end the transaction. */
1779 int reiserfs_new_inode(struct reiserfs_transaction_handle *th, 1779 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1780 struct inode *dir, int mode, const char *symname, 1780 struct inode *dir, int mode, const char *symname,
1781 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1781 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1782 strlen (symname) for symlinks) */ 1782 strlen (symname) for symlinks) */
1783 loff_t i_size, struct dentry *dentry, 1783 loff_t i_size, struct dentry *dentry,
1784 struct inode *inode) 1784 struct inode *inode)
1785 { 1785 {
1786 struct super_block *sb; 1786 struct super_block *sb;
1787 INITIALIZE_PATH(path_to_key); 1787 INITIALIZE_PATH(path_to_key);
1788 struct cpu_key key; 1788 struct cpu_key key;
1789 struct item_head ih; 1789 struct item_head ih;
1790 struct stat_data sd; 1790 struct stat_data sd;
1791 int retval; 1791 int retval;
1792 int err; 1792 int err;
1793 1793
1794 BUG_ON(!th->t_trans_id); 1794 BUG_ON(!th->t_trans_id);
1795 1795
1796 if (DQUOT_ALLOC_INODE(inode)) { 1796 if (DQUOT_ALLOC_INODE(inode)) {
1797 err = -EDQUOT; 1797 err = -EDQUOT;
1798 goto out_end_trans; 1798 goto out_end_trans;
1799 } 1799 }
1800 if (!dir || !dir->i_nlink) { 1800 if (!dir || !dir->i_nlink) {
1801 err = -EPERM; 1801 err = -EPERM;
1802 goto out_bad_inode; 1802 goto out_bad_inode;
1803 } 1803 }
1804 1804
1805 sb = dir->i_sb; 1805 sb = dir->i_sb;
1806 1806
1807 /* item head of new item */ 1807 /* item head of new item */
1808 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); 1808 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
1809 ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); 1809 ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
1810 if (!ih.ih_key.k_objectid) { 1810 if (!ih.ih_key.k_objectid) {
1811 err = -ENOMEM; 1811 err = -ENOMEM;
1812 goto out_bad_inode; 1812 goto out_bad_inode;
1813 } 1813 }
1814 if (old_format_only(sb)) 1814 if (old_format_only(sb))
1815 /* not a perfect generation count, as object ids can be reused, but 1815 /* not a perfect generation count, as object ids can be reused, but
1816 ** this is as good as reiserfs can do right now. 1816 ** this is as good as reiserfs can do right now.
1817 ** note that the private part of inode isn't filled in yet, we have 1817 ** note that the private part of inode isn't filled in yet, we have
1818 ** to use the directory. 1818 ** to use the directory.
1819 */ 1819 */
1820 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); 1820 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1821 else 1821 else
1822 #if defined( USE_INODE_GENERATION_COUNTER ) 1822 #if defined( USE_INODE_GENERATION_COUNTER )
1823 inode->i_generation = 1823 inode->i_generation =
1824 le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); 1824 le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1825 #else 1825 #else
1826 inode->i_generation = ++event; 1826 inode->i_generation = ++event;
1827 #endif 1827 #endif
1828 1828
1829 /* fill stat data */ 1829 /* fill stat data */
1830 inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); 1830 inode->i_nlink = (S_ISDIR(mode) ? 2 : 1);
1831 1831
1832 /* uid and gid must already be set by the caller for quota init */ 1832 /* uid and gid must already be set by the caller for quota init */
1833 1833
1834 /* symlink cannot be immutable or append only, right? */ 1834 /* symlink cannot be immutable or append only, right? */
1835 if (S_ISLNK(inode->i_mode)) 1835 if (S_ISLNK(inode->i_mode))
1836 inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); 1836 inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
1837 1837
1838 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 1838 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
1839 inode->i_size = i_size; 1839 inode->i_size = i_size;
1840 inode->i_blocks = 0; 1840 inode->i_blocks = 0;
1841 inode->i_bytes = 0; 1841 inode->i_bytes = 0;
1842 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : 1842 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1843 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; 1843 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
1844 1844
1845 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); 1845 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1846 REISERFS_I(inode)->i_flags = 0; 1846 REISERFS_I(inode)->i_flags = 0;
1847 REISERFS_I(inode)->i_prealloc_block = 0; 1847 REISERFS_I(inode)->i_prealloc_block = 0;
1848 REISERFS_I(inode)->i_prealloc_count = 0; 1848 REISERFS_I(inode)->i_prealloc_count = 0;
1849 REISERFS_I(inode)->i_trans_id = 0; 1849 REISERFS_I(inode)->i_trans_id = 0;
1850 REISERFS_I(inode)->i_jl = NULL; 1850 REISERFS_I(inode)->i_jl = NULL;
1851 REISERFS_I(inode)->i_attrs = 1851 REISERFS_I(inode)->i_attrs =
1852 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1852 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1853 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); 1853 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1854 REISERFS_I(inode)->i_acl_access = NULL; 1854 REISERFS_I(inode)->i_acl_access = NULL;
1855 REISERFS_I(inode)->i_acl_default = NULL; 1855 REISERFS_I(inode)->i_acl_default = NULL;
1856 init_rwsem(&REISERFS_I(inode)->xattr_sem); 1856 init_rwsem(&REISERFS_I(inode)->xattr_sem);
1857 1857
1858 if (old_format_only(sb)) 1858 if (old_format_only(sb))
1859 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, 1859 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
1860 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); 1860 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1861 else 1861 else
1862 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, 1862 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1863 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); 1863 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1864 1864
1865 /* key to search for correct place for new stat data */ 1865 /* key to search for correct place for new stat data */
1866 _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), 1866 _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
1867 le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, 1867 le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
1868 TYPE_STAT_DATA, 3 /*key length */ ); 1868 TYPE_STAT_DATA, 3 /*key length */ );
1869 1869
1870 /* find proper place for inserting of stat data */ 1870 /* find proper place for inserting of stat data */
1871 retval = search_item(sb, &key, &path_to_key); 1871 retval = search_item(sb, &key, &path_to_key);
1872 if (retval == IO_ERROR) { 1872 if (retval == IO_ERROR) {
1873 err = -EIO; 1873 err = -EIO;
1874 goto out_bad_inode; 1874 goto out_bad_inode;
1875 } 1875 }
1876 if (retval == ITEM_FOUND) { 1876 if (retval == ITEM_FOUND) {
1877 pathrelse(&path_to_key); 1877 pathrelse(&path_to_key);
1878 err = -EEXIST; 1878 err = -EEXIST;
1879 goto out_bad_inode; 1879 goto out_bad_inode;
1880 } 1880 }
1881 if (old_format_only(sb)) { 1881 if (old_format_only(sb)) {
1882 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { 1882 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1883 pathrelse(&path_to_key); 1883 pathrelse(&path_to_key);
1884 /* i_uid or i_gid is too big to be stored in stat data v3.5 */ 1884 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1885 err = -EINVAL; 1885 err = -EINVAL;
1886 goto out_bad_inode; 1886 goto out_bad_inode;
1887 } 1887 }
1888 inode2sd_v1(&sd, inode, inode->i_size); 1888 inode2sd_v1(&sd, inode, inode->i_size);
1889 } else { 1889 } else {
1890 inode2sd(&sd, inode, inode->i_size); 1890 inode2sd(&sd, inode, inode->i_size);
1891 } 1891 }
1892 // these do not go to on-disk stat data 1892 // these do not go to on-disk stat data
1893 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); 1893 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1894 inode->i_blksize = reiserfs_default_io_size; 1894 inode->i_blksize = reiserfs_default_io_size;
1895 1895
1896 // store in in-core inode the key of stat data and version all 1896 // store in in-core inode the key of stat data and version all
1897 // object items will have (directory items will have old offset 1897 // object items will have (directory items will have old offset
1898 // format, other new objects will consist of new items) 1898 // format, other new objects will consist of new items)
1899 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); 1899 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1900 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) 1900 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1901 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1901 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1902 else 1902 else
1903 set_inode_item_key_version(inode, KEY_FORMAT_3_6); 1903 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1904 if (old_format_only(sb)) 1904 if (old_format_only(sb))
1905 set_inode_sd_version(inode, STAT_DATA_V1); 1905 set_inode_sd_version(inode, STAT_DATA_V1);
1906 else 1906 else
1907 set_inode_sd_version(inode, STAT_DATA_V2); 1907 set_inode_sd_version(inode, STAT_DATA_V2);
1908 1908
1909 /* insert the stat data into the tree */ 1909 /* insert the stat data into the tree */
1910 #ifdef DISPLACE_NEW_PACKING_LOCALITIES 1910 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1911 if (REISERFS_I(dir)->new_packing_locality) 1911 if (REISERFS_I(dir)->new_packing_locality)
1912 th->displace_new_blocks = 1; 1912 th->displace_new_blocks = 1;
1913 #endif 1913 #endif
1914 retval = 1914 retval =
1915 reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, 1915 reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
1916 (char *)(&sd)); 1916 (char *)(&sd));
1917 if (retval) { 1917 if (retval) {
1918 err = retval; 1918 err = retval;
1919 reiserfs_check_path(&path_to_key); 1919 reiserfs_check_path(&path_to_key);
1920 goto out_bad_inode; 1920 goto out_bad_inode;
1921 } 1921 }
1922 #ifdef DISPLACE_NEW_PACKING_LOCALITIES 1922 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1923 if (!th->displace_new_blocks) 1923 if (!th->displace_new_blocks)
1924 REISERFS_I(dir)->new_packing_locality = 0; 1924 REISERFS_I(dir)->new_packing_locality = 0;
1925 #endif 1925 #endif
1926 if (S_ISDIR(mode)) { 1926 if (S_ISDIR(mode)) {
1927 /* insert item with "." and ".." */ 1927 /* insert item with "." and ".." */
1928 retval = 1928 retval =
1929 reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); 1929 reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
1930 } 1930 }
1931 1931
1932 if (S_ISLNK(mode)) { 1932 if (S_ISLNK(mode)) {
1933 /* insert body of symlink */ 1933 /* insert body of symlink */
1934 if (!old_format_only(sb)) 1934 if (!old_format_only(sb))
1935 i_size = ROUND_UP(i_size); 1935 i_size = ROUND_UP(i_size);
1936 retval = 1936 retval =
1937 reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, 1937 reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
1938 i_size); 1938 i_size);
1939 } 1939 }
1940 if (retval) { 1940 if (retval) {
1941 err = retval; 1941 err = retval;
1942 reiserfs_check_path(&path_to_key); 1942 reiserfs_check_path(&path_to_key);
1943 journal_end(th, th->t_super, th->t_blocks_allocated); 1943 journal_end(th, th->t_super, th->t_blocks_allocated);
1944 goto out_inserted_sd; 1944 goto out_inserted_sd;
1945 } 1945 }
1946 1946
1947 /* XXX CHECK THIS */ 1947 /* XXX CHECK THIS */
1948 if (reiserfs_posixacl(inode->i_sb)) { 1948 if (reiserfs_posixacl(inode->i_sb)) {
1949 retval = reiserfs_inherit_default_acl(dir, dentry, inode); 1949 retval = reiserfs_inherit_default_acl(dir, dentry, inode);
1950 if (retval) { 1950 if (retval) {
1951 err = retval; 1951 err = retval;
1952 reiserfs_check_path(&path_to_key); 1952 reiserfs_check_path(&path_to_key);
1953 journal_end(th, th->t_super, th->t_blocks_allocated); 1953 journal_end(th, th->t_super, th->t_blocks_allocated);
1954 goto out_inserted_sd; 1954 goto out_inserted_sd;
1955 } 1955 }
1956 } else if (inode->i_sb->s_flags & MS_POSIXACL) { 1956 } else if (inode->i_sb->s_flags & MS_POSIXACL) {
1957 reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, " 1957 reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, "
1958 "but vfs thinks they are!"); 1958 "but vfs thinks they are!");
1959 } else if (is_reiserfs_priv_object(dir)) { 1959 } else if (is_reiserfs_priv_object(dir)) {
1960 reiserfs_mark_inode_private(inode); 1960 reiserfs_mark_inode_private(inode);
1961 } 1961 }
1962 1962
1963 insert_inode_hash(inode); 1963 insert_inode_hash(inode);
1964 reiserfs_update_sd(th, inode); 1964 reiserfs_update_sd(th, inode);
1965 reiserfs_check_path(&path_to_key); 1965 reiserfs_check_path(&path_to_key);
1966 1966
1967 return 0; 1967 return 0;
1968 1968
1969 /* it looks like you can easily compress these two goto targets into 1969 /* it looks like you can easily compress these two goto targets into
1970 * one. Keeping it like this doesn't actually hurt anything, and they 1970 * one. Keeping it like this doesn't actually hurt anything, and they
1971 * are place holders for what the quota code actually needs. 1971 * are place holders for what the quota code actually needs.
1972 */ 1972 */
1973 out_bad_inode: 1973 out_bad_inode:
1974 /* Invalidate the object, nothing was inserted yet */ 1974 /* Invalidate the object, nothing was inserted yet */
1975 INODE_PKEY(inode)->k_objectid = 0; 1975 INODE_PKEY(inode)->k_objectid = 0;
1976 1976
1977 /* Quota change must be inside a transaction for journaling */ 1977 /* Quota change must be inside a transaction for journaling */
1978 DQUOT_FREE_INODE(inode); 1978 DQUOT_FREE_INODE(inode);
1979 1979
1980 out_end_trans: 1980 out_end_trans:
1981 journal_end(th, th->t_super, th->t_blocks_allocated); 1981 journal_end(th, th->t_super, th->t_blocks_allocated);
1982 /* Drop can be outside and it needs more credits so it's better to have it outside */ 1982 /* Drop can be outside and it needs more credits so it's better to have it outside */
1983 DQUOT_DROP(inode); 1983 DQUOT_DROP(inode);
1984 inode->i_flags |= S_NOQUOTA; 1984 inode->i_flags |= S_NOQUOTA;
1985 make_bad_inode(inode); 1985 make_bad_inode(inode);
1986 1986
1987 out_inserted_sd: 1987 out_inserted_sd:
1988 inode->i_nlink = 0; 1988 inode->i_nlink = 0;
1989 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1989 th->t_trans_id = 0; /* so the caller can't use this handle later */
1990 1990
1991 /* If we were inheriting an ACL, we need to release the lock so that 1991 /* If we were inheriting an ACL, we need to release the lock so that
1992 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking 1992 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
1993 * code really needs to be reworked, but this will take care of it 1993 * code really needs to be reworked, but this will take care of it
1994 * for now. -jeffm */ 1994 * for now. -jeffm */
1995 if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { 1995 if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
1996 reiserfs_write_unlock_xattrs(dir->i_sb); 1996 reiserfs_write_unlock_xattrs(dir->i_sb);
1997 iput(inode); 1997 iput(inode);
1998 reiserfs_write_lock_xattrs(dir->i_sb); 1998 reiserfs_write_lock_xattrs(dir->i_sb);
1999 } else 1999 } else
2000 iput(inode); 2000 iput(inode);
2001 return err; 2001 return err;
2002 } 2002 }
2003 2003
2004 /* 2004 /*
2005 ** finds the tail page in the page cache, 2005 ** finds the tail page in the page cache,
2006 ** reads the last block in. 2006 ** reads the last block in.
2007 ** 2007 **
2008 ** On success, page_result is set to a locked, pinned page, and bh_result 2008 ** On success, page_result is set to a locked, pinned page, and bh_result
2009 ** is set to an up to date buffer for the last block in the file. returns 0. 2009 ** is set to an up to date buffer for the last block in the file. returns 0.
2010 ** 2010 **
2011 ** tail conversion is not done, so bh_result might not be valid for writing 2011 ** tail conversion is not done, so bh_result might not be valid for writing
2012 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before 2012 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
2013 ** trying to write the block. 2013 ** trying to write the block.
2014 ** 2014 **
2015 ** on failure, nonzero is returned, page_result and bh_result are untouched. 2015 ** on failure, nonzero is returned, page_result and bh_result are untouched.
2016 */ 2016 */
2017 static int grab_tail_page(struct inode *p_s_inode, 2017 static int grab_tail_page(struct inode *p_s_inode,
2018 struct page **page_result, 2018 struct page **page_result,
2019 struct buffer_head **bh_result) 2019 struct buffer_head **bh_result)
2020 { 2020 {
2021 2021
2022 /* we want the page with the last byte in the file, 2022 /* we want the page with the last byte in the file,
2023 ** not the page that will hold the next byte for appending 2023 ** not the page that will hold the next byte for appending
2024 */ 2024 */
2025 unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT; 2025 unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT;
2026 unsigned long pos = 0; 2026 unsigned long pos = 0;
2027 unsigned long start = 0; 2027 unsigned long start = 0;
2028 unsigned long blocksize = p_s_inode->i_sb->s_blocksize; 2028 unsigned long blocksize = p_s_inode->i_sb->s_blocksize;
2029 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1); 2029 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1);
2030 struct buffer_head *bh; 2030 struct buffer_head *bh;
2031 struct buffer_head *head; 2031 struct buffer_head *head;
2032 struct page *page; 2032 struct page *page;
2033 int error; 2033 int error;
2034 2034
2035 /* we know that we are only called with inode->i_size > 0. 2035 /* we know that we are only called with inode->i_size > 0.
2036 ** we also know that a file tail can never be as big as a block 2036 ** we also know that a file tail can never be as big as a block
2037 ** If i_size % blocksize == 0, our file is currently block aligned 2037 ** If i_size % blocksize == 0, our file is currently block aligned
2038 ** and it won't need converting or zeroing after a truncate. 2038 ** and it won't need converting or zeroing after a truncate.
2039 */ 2039 */
2040 if ((offset & (blocksize - 1)) == 0) { 2040 if ((offset & (blocksize - 1)) == 0) {
2041 return -ENOENT; 2041 return -ENOENT;
2042 } 2042 }
2043 page = grab_cache_page(p_s_inode->i_mapping, index); 2043 page = grab_cache_page(p_s_inode->i_mapping, index);
2044 error = -ENOMEM; 2044 error = -ENOMEM;
2045 if (!page) { 2045 if (!page) {
2046 goto out; 2046 goto out;
2047 } 2047 }
2048 /* start within the page of the last block in the file */ 2048 /* start within the page of the last block in the file */
2049 start = (offset / blocksize) * blocksize; 2049 start = (offset / blocksize) * blocksize;
2050 2050
2051 error = block_prepare_write(page, start, offset, 2051 error = block_prepare_write(page, start, offset,
2052 reiserfs_get_block_create_0); 2052 reiserfs_get_block_create_0);
2053 if (error) 2053 if (error)
2054 goto unlock; 2054 goto unlock;
2055 2055
2056 head = page_buffers(page); 2056 head = page_buffers(page);
2057 bh = head; 2057 bh = head;
2058 do { 2058 do {
2059 if (pos >= start) { 2059 if (pos >= start) {
2060 break; 2060 break;
2061 } 2061 }
2062 bh = bh->b_this_page; 2062 bh = bh->b_this_page;
2063 pos += blocksize; 2063 pos += blocksize;
2064 } while (bh != head); 2064 } while (bh != head);
2065 2065
2066 if (!buffer_uptodate(bh)) { 2066 if (!buffer_uptodate(bh)) {
2067 /* note, this should never happen, prepare_write should 2067 /* note, this should never happen, prepare_write should
2068 ** be taking care of this for us. If the buffer isn't up to date, 2068 ** be taking care of this for us. If the buffer isn't up to date,
2069 ** I've screwed up the code to find the buffer, or the code to 2069 ** I've screwed up the code to find the buffer, or the code to
2070 ** call prepare_write 2070 ** call prepare_write
2071 */ 2071 */
2072 reiserfs_warning(p_s_inode->i_sb, 2072 reiserfs_warning(p_s_inode->i_sb,
2073 "clm-6000: error reading block %lu on dev %s", 2073 "clm-6000: error reading block %lu on dev %s",
2074 bh->b_blocknr, 2074 bh->b_blocknr,
2075 reiserfs_bdevname(p_s_inode->i_sb)); 2075 reiserfs_bdevname(p_s_inode->i_sb));
2076 error = -EIO; 2076 error = -EIO;
2077 goto unlock; 2077 goto unlock;
2078 } 2078 }
2079 *bh_result = bh; 2079 *bh_result = bh;
2080 *page_result = page; 2080 *page_result = page;
2081 2081
2082 out: 2082 out:
2083 return error; 2083 return error;
2084 2084
2085 unlock: 2085 unlock:
2086 unlock_page(page); 2086 unlock_page(page);
2087 page_cache_release(page); 2087 page_cache_release(page);
2088 return error; 2088 return error;
2089 } 2089 }
2090 2090
2091 /* 2091 /*
2092 ** vfs version of truncate file. Must NOT be called with 2092 ** vfs version of truncate file. Must NOT be called with
2093 ** a transaction already started. 2093 ** a transaction already started.
2094 ** 2094 **
2095 ** some code taken from block_truncate_page 2095 ** some code taken from block_truncate_page
2096 */ 2096 */
2097 int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) 2097 int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
2098 { 2098 {
2099 struct reiserfs_transaction_handle th; 2099 struct reiserfs_transaction_handle th;
2100 /* we want the offset for the first byte after the end of the file */ 2100 /* we want the offset for the first byte after the end of the file */
2101 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1); 2101 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1);
2102 unsigned blocksize = p_s_inode->i_sb->s_blocksize; 2102 unsigned blocksize = p_s_inode->i_sb->s_blocksize;
2103 unsigned length; 2103 unsigned length;
2104 struct page *page = NULL; 2104 struct page *page = NULL;
2105 int error; 2105 int error;
2106 struct buffer_head *bh = NULL; 2106 struct buffer_head *bh = NULL;
2107 int err2; 2107 int err2;
2108 2108
2109 reiserfs_write_lock(p_s_inode->i_sb); 2109 reiserfs_write_lock(p_s_inode->i_sb);
2110 2110
2111 if (p_s_inode->i_size > 0) { 2111 if (p_s_inode->i_size > 0) {
2112 if ((error = grab_tail_page(p_s_inode, &page, &bh))) { 2112 if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
2113 // -ENOENT means we truncated past the end of the file, 2113 // -ENOENT means we truncated past the end of the file,
2114 // and get_block_create_0 could not find a block to read in, 2114 // and get_block_create_0 could not find a block to read in,
2115 // which is ok. 2115 // which is ok.
2116 if (error != -ENOENT) 2116 if (error != -ENOENT)
2117 reiserfs_warning(p_s_inode->i_sb, 2117 reiserfs_warning(p_s_inode->i_sb,
2118 "clm-6001: grab_tail_page failed %d", 2118 "clm-6001: grab_tail_page failed %d",
2119 error); 2119 error);
2120 page = NULL; 2120 page = NULL;
2121 bh = NULL; 2121 bh = NULL;
2122 } 2122 }
2123 } 2123 }
2124 2124
2125 /* so, if page != NULL, we have a buffer head for the offset at 2125 /* so, if page != NULL, we have a buffer head for the offset at
2126 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, 2126 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2127 ** then we have an unformatted node. Otherwise, we have a direct item, 2127 ** then we have an unformatted node. Otherwise, we have a direct item,
2128 ** and no zeroing is required on disk. We zero after the truncate, 2128 ** and no zeroing is required on disk. We zero after the truncate,
2129 ** because the truncate might pack the item anyway 2129 ** because the truncate might pack the item anyway
2130 ** (it will unmap bh if it packs). 2130 ** (it will unmap bh if it packs).
2131 */ 2131 */
2132 /* it is enough to reserve space in transaction for 2 balancings: 2132 /* it is enough to reserve space in transaction for 2 balancings:
2133 one for "save" link adding and another for the first 2133 one for "save" link adding and another for the first
2134 cut_from_item. 1 is for update_sd */ 2134 cut_from_item. 1 is for update_sd */
2135 error = journal_begin(&th, p_s_inode->i_sb, 2135 error = journal_begin(&th, p_s_inode->i_sb,
2136 JOURNAL_PER_BALANCE_CNT * 2 + 1); 2136 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2137 if (error) 2137 if (error)
2138 goto out; 2138 goto out;
2139 reiserfs_update_inode_transaction(p_s_inode); 2139 reiserfs_update_inode_transaction(p_s_inode);
2140 if (update_timestamps) 2140 if (update_timestamps)
2141 /* we are doing real truncate: if the system crashes before the last 2141 /* we are doing real truncate: if the system crashes before the last
2142 transaction of truncating gets committed - on reboot the file 2142 transaction of truncating gets committed - on reboot the file
2143 either appears truncated properly or not truncated at all */ 2143 either appears truncated properly or not truncated at all */
2144 add_save_link(&th, p_s_inode, 1); 2144 add_save_link(&th, p_s_inode, 1);
2145 err2 = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps); 2145 err2 = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps);
2146 error = 2146 error =
2147 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); 2147 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2148 if (error) 2148 if (error)
2149 goto out; 2149 goto out;
2150 2150
2151 /* check reiserfs_do_truncate after ending the transaction */ 2151 /* check reiserfs_do_truncate after ending the transaction */
2152 if (err2) { 2152 if (err2) {
2153 error = err2; 2153 error = err2;
2154 goto out; 2154 goto out;
2155 } 2155 }
2156 2156
2157 if (update_timestamps) { 2157 if (update_timestamps) {
2158 error = remove_save_link(p_s_inode, 1 /* truncate */ ); 2158 error = remove_save_link(p_s_inode, 1 /* truncate */ );
2159 if (error) 2159 if (error)
2160 goto out; 2160 goto out;
2161 } 2161 }
2162 2162
2163 if (page) { 2163 if (page) {
2164 length = offset & (blocksize - 1); 2164 length = offset & (blocksize - 1);
2165 /* if we are not on a block boundary */ 2165 /* if we are not on a block boundary */
2166 if (length) { 2166 if (length) {
2167 char *kaddr; 2167 char *kaddr;
2168 2168
2169 length = blocksize - length; 2169 length = blocksize - length;
2170 kaddr = kmap_atomic(page, KM_USER0); 2170 kaddr = kmap_atomic(page, KM_USER0);
2171 memset(kaddr + offset, 0, length); 2171 memset(kaddr + offset, 0, length);
2172 flush_dcache_page(page); 2172 flush_dcache_page(page);
2173 kunmap_atomic(kaddr, KM_USER0); 2173 kunmap_atomic(kaddr, KM_USER0);
2174 if (buffer_mapped(bh) && bh->b_blocknr != 0) { 2174 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2175 mark_buffer_dirty(bh); 2175 mark_buffer_dirty(bh);
2176 } 2176 }
2177 } 2177 }
2178 unlock_page(page); 2178 unlock_page(page);
2179 page_cache_release(page); 2179 page_cache_release(page);
2180 } 2180 }
2181 2181
2182 reiserfs_write_unlock(p_s_inode->i_sb); 2182 reiserfs_write_unlock(p_s_inode->i_sb);
2183 return 0; 2183 return 0;
2184 out: 2184 out:
2185 if (page) { 2185 if (page) {
2186 unlock_page(page); 2186 unlock_page(page);
2187 page_cache_release(page); 2187 page_cache_release(page);
2188 } 2188 }
2189 reiserfs_write_unlock(p_s_inode->i_sb); 2189 reiserfs_write_unlock(p_s_inode->i_sb);
2190 return error; 2190 return error;
2191 } 2191 }
2192 2192
2193 static int map_block_for_writepage(struct inode *inode, 2193 static int map_block_for_writepage(struct inode *inode,
2194 struct buffer_head *bh_result, 2194 struct buffer_head *bh_result,
2195 unsigned long block) 2195 unsigned long block)
2196 { 2196 {
2197 struct reiserfs_transaction_handle th; 2197 struct reiserfs_transaction_handle th;
2198 int fs_gen; 2198 int fs_gen;
2199 struct item_head tmp_ih; 2199 struct item_head tmp_ih;
2200 struct item_head *ih; 2200 struct item_head *ih;
2201 struct buffer_head *bh; 2201 struct buffer_head *bh;
2202 __le32 *item; 2202 __le32 *item;
2203 struct cpu_key key; 2203 struct cpu_key key;
2204 INITIALIZE_PATH(path); 2204 INITIALIZE_PATH(path);
2205 int pos_in_item; 2205 int pos_in_item;
2206 int jbegin_count = JOURNAL_PER_BALANCE_CNT; 2206 int jbegin_count = JOURNAL_PER_BALANCE_CNT;
2207 loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1; 2207 loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1;
2208 int retval; 2208 int retval;
2209 int use_get_block = 0; 2209 int use_get_block = 0;
2210 int bytes_copied = 0; 2210 int bytes_copied = 0;
2211 int copy_size; 2211 int copy_size;
2212 int trans_running = 0; 2212 int trans_running = 0;
2213 2213
2214 /* catch places below that try to log something without starting a trans */ 2214 /* catch places below that try to log something without starting a trans */
2215 th.t_trans_id = 0; 2215 th.t_trans_id = 0;
2216 2216
2217 if (!buffer_uptodate(bh_result)) { 2217 if (!buffer_uptodate(bh_result)) {
2218 return -EIO; 2218 return -EIO;
2219 } 2219 }
2220 2220
2221 kmap(bh_result->b_page); 2221 kmap(bh_result->b_page);
2222 start_over: 2222 start_over:
2223 reiserfs_write_lock(inode->i_sb); 2223 reiserfs_write_lock(inode->i_sb);
2224 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); 2224 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2225 2225
2226 research: 2226 research:
2227 retval = search_for_position_by_key(inode->i_sb, &key, &path); 2227 retval = search_for_position_by_key(inode->i_sb, &key, &path);
2228 if (retval != POSITION_FOUND) { 2228 if (retval != POSITION_FOUND) {
2229 use_get_block = 1; 2229 use_get_block = 1;
2230 goto out; 2230 goto out;
2231 } 2231 }
2232 2232
2233 bh = get_last_bh(&path); 2233 bh = get_last_bh(&path);
2234 ih = get_ih(&path); 2234 ih = get_ih(&path);
2235 item = get_item(&path); 2235 item = get_item(&path);
2236 pos_in_item = path.pos_in_item; 2236 pos_in_item = path.pos_in_item;
2237 2237
2238 /* we've found an unformatted node */ 2238 /* we've found an unformatted node */
2239 if (indirect_item_found(retval, ih)) { 2239 if (indirect_item_found(retval, ih)) {
2240 if (bytes_copied > 0) { 2240 if (bytes_copied > 0) {
2241 reiserfs_warning(inode->i_sb, 2241 reiserfs_warning(inode->i_sb,
2242 "clm-6002: bytes_copied %d", 2242 "clm-6002: bytes_copied %d",
2243 bytes_copied); 2243 bytes_copied);
2244 } 2244 }
2245 if (!get_block_num(item, pos_in_item)) { 2245 if (!get_block_num(item, pos_in_item)) {
2246 /* crap, we are writing to a hole */ 2246 /* crap, we are writing to a hole */
2247 use_get_block = 1; 2247 use_get_block = 1;
2248 goto out; 2248 goto out;
2249 } 2249 }
2250 set_block_dev_mapped(bh_result, 2250 set_block_dev_mapped(bh_result,
2251 get_block_num(item, pos_in_item), inode); 2251 get_block_num(item, pos_in_item), inode);
2252 } else if (is_direct_le_ih(ih)) { 2252 } else if (is_direct_le_ih(ih)) {
2253 char *p; 2253 char *p;
2254 p = page_address(bh_result->b_page); 2254 p = page_address(bh_result->b_page);
2255 p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1); 2255 p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
2256 copy_size = ih_item_len(ih) - pos_in_item; 2256 copy_size = ih_item_len(ih) - pos_in_item;
2257 2257
2258 fs_gen = get_generation(inode->i_sb); 2258 fs_gen = get_generation(inode->i_sb);
2259 copy_item_head(&tmp_ih, ih); 2259 copy_item_head(&tmp_ih, ih);
2260 2260
2261 if (!trans_running) { 2261 if (!trans_running) {
2262 /* vs-3050 is gone, no need to drop the path */ 2262 /* vs-3050 is gone, no need to drop the path */
2263 retval = journal_begin(&th, inode->i_sb, jbegin_count); 2263 retval = journal_begin(&th, inode->i_sb, jbegin_count);
2264 if (retval) 2264 if (retval)
2265 goto out; 2265 goto out;
2266 reiserfs_update_inode_transaction(inode); 2266 reiserfs_update_inode_transaction(inode);
2267 trans_running = 1; 2267 trans_running = 1;
2268 if (fs_changed(fs_gen, inode->i_sb) 2268 if (fs_changed(fs_gen, inode->i_sb)
2269 && item_moved(&tmp_ih, &path)) { 2269 && item_moved(&tmp_ih, &path)) {
2270 reiserfs_restore_prepared_buffer(inode->i_sb, 2270 reiserfs_restore_prepared_buffer(inode->i_sb,
2271 bh); 2271 bh);
2272 goto research; 2272 goto research;
2273 } 2273 }
2274 } 2274 }
2275 2275
2276 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); 2276 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
2277 2277
2278 if (fs_changed(fs_gen, inode->i_sb) 2278 if (fs_changed(fs_gen, inode->i_sb)
2279 && item_moved(&tmp_ih, &path)) { 2279 && item_moved(&tmp_ih, &path)) {
2280 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 2280 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
2281 goto research; 2281 goto research;
2282 } 2282 }
2283 2283
2284 memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, 2284 memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
2285 copy_size); 2285 copy_size);
2286 2286
2287 journal_mark_dirty(&th, inode->i_sb, bh); 2287 journal_mark_dirty(&th, inode->i_sb, bh);
2288 bytes_copied += copy_size; 2288 bytes_copied += copy_size;
2289 set_block_dev_mapped(bh_result, 0, inode); 2289 set_block_dev_mapped(bh_result, 0, inode);
2290 2290
2291 /* are there still bytes left? */ 2291 /* are there still bytes left? */
2292 if (bytes_copied < bh_result->b_size && 2292 if (bytes_copied < bh_result->b_size &&
2293 (byte_offset + bytes_copied) < inode->i_size) { 2293 (byte_offset + bytes_copied) < inode->i_size) {
2294 set_cpu_key_k_offset(&key, 2294 set_cpu_key_k_offset(&key,
2295 cpu_key_k_offset(&key) + 2295 cpu_key_k_offset(&key) +
2296 copy_size); 2296 copy_size);
2297 goto research; 2297 goto research;
2298 } 2298 }
2299 } else { 2299 } else {
2300 reiserfs_warning(inode->i_sb, 2300 reiserfs_warning(inode->i_sb,
2301 "clm-6003: bad item inode %lu, device %s", 2301 "clm-6003: bad item inode %lu, device %s",
2302 inode->i_ino, reiserfs_bdevname(inode->i_sb)); 2302 inode->i_ino, reiserfs_bdevname(inode->i_sb));
2303 retval = -EIO; 2303 retval = -EIO;
2304 goto out; 2304 goto out;
2305 } 2305 }
2306 retval = 0; 2306 retval = 0;
2307 2307
2308 out: 2308 out:
2309 pathrelse(&path); 2309 pathrelse(&path);
2310 if (trans_running) { 2310 if (trans_running) {
2311 int err = journal_end(&th, inode->i_sb, jbegin_count); 2311 int err = journal_end(&th, inode->i_sb, jbegin_count);
2312 if (err) 2312 if (err)
2313 retval = err; 2313 retval = err;
2314 trans_running = 0; 2314 trans_running = 0;
2315 } 2315 }
2316 reiserfs_write_unlock(inode->i_sb); 2316 reiserfs_write_unlock(inode->i_sb);
2317 2317
2318 /* this is where we fill in holes in the file. */ 2318 /* this is where we fill in holes in the file. */
2319 if (use_get_block) { 2319 if (use_get_block) {
2320 retval = reiserfs_get_block(inode, block, bh_result, 2320 retval = reiserfs_get_block(inode, block, bh_result,
2321 GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX 2321 GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
2322 | GET_BLOCK_NO_DANGLE); 2322 | GET_BLOCK_NO_DANGLE);
2323 if (!retval) { 2323 if (!retval) {
2324 if (!buffer_mapped(bh_result) 2324 if (!buffer_mapped(bh_result)
2325 || bh_result->b_blocknr == 0) { 2325 || bh_result->b_blocknr == 0) {
2326 /* get_block failed to find a mapped unformatted node. */ 2326 /* get_block failed to find a mapped unformatted node. */
2327 use_get_block = 0; 2327 use_get_block = 0;
2328 goto start_over; 2328 goto start_over;
2329 } 2329 }
2330 } 2330 }
2331 } 2331 }
2332 kunmap(bh_result->b_page); 2332 kunmap(bh_result->b_page);
2333 2333
2334 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { 2334 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2335 /* we've copied data from the page into the direct item, so the 2335 /* we've copied data from the page into the direct item, so the
2336 * buffer in the page is now clean, mark it to reflect that. 2336 * buffer in the page is now clean, mark it to reflect that.
2337 */ 2337 */
2338 lock_buffer(bh_result); 2338 lock_buffer(bh_result);
2339 clear_buffer_dirty(bh_result); 2339 clear_buffer_dirty(bh_result);
2340 unlock_buffer(bh_result); 2340 unlock_buffer(bh_result);
2341 } 2341 }
2342 return retval; 2342 return retval;
2343 } 2343 }
2344 2344
2345 /* 2345 /*
2346 * mason@suse.com: updated in 2.5.54 to follow the same general io 2346 * mason@suse.com: updated in 2.5.54 to follow the same general io
2347 * start/recovery path as __block_write_full_page, along with special 2347 * start/recovery path as __block_write_full_page, along with special
2348 * code to handle reiserfs tails. 2348 * code to handle reiserfs tails.
2349 */ 2349 */
2350 static int reiserfs_write_full_page(struct page *page, 2350 static int reiserfs_write_full_page(struct page *page,
2351 struct writeback_control *wbc) 2351 struct writeback_control *wbc)
2352 { 2352 {
2353 struct inode *inode = page->mapping->host; 2353 struct inode *inode = page->mapping->host;
2354 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; 2354 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2355 int error = 0; 2355 int error = 0;
2356 unsigned long block; 2356 unsigned long block;
2357 struct buffer_head *head, *bh; 2357 struct buffer_head *head, *bh;
2358 int partial = 0; 2358 int partial = 0;
2359 int nr = 0; 2359 int nr = 0;
2360 int checked = PageChecked(page); 2360 int checked = PageChecked(page);
2361 struct reiserfs_transaction_handle th; 2361 struct reiserfs_transaction_handle th;
2362 struct super_block *s = inode->i_sb; 2362 struct super_block *s = inode->i_sb;
2363 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 2363 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
2364 th.t_trans_id = 0; 2364 th.t_trans_id = 0;
2365 2365
2366 /* no logging allowed when nonblocking or from PF_MEMALLOC */
2367 if (checked && (current->flags & PF_MEMALLOC)) {
2368 redirty_page_for_writepage(wbc, page);
2369 unlock_page(page);
2370 return 0;
2371 }
2372
2366 /* The page dirty bit is cleared before writepage is called, which 2373 /* The page dirty bit is cleared before writepage is called, which
2367 * means we have to tell create_empty_buffers to make dirty buffers 2374 * means we have to tell create_empty_buffers to make dirty buffers
2368 * The page really should be up to date at this point, so tossing 2375 * The page really should be up to date at this point, so tossing
2369 * in the BH_Uptodate is just a sanity check. 2376 * in the BH_Uptodate is just a sanity check.
2370 */ 2377 */
2371 if (!page_has_buffers(page)) { 2378 if (!page_has_buffers(page)) {
2372 create_empty_buffers(page, s->s_blocksize, 2379 create_empty_buffers(page, s->s_blocksize,
2373 (1 << BH_Dirty) | (1 << BH_Uptodate)); 2380 (1 << BH_Dirty) | (1 << BH_Uptodate));
2374 } 2381 }
2375 head = page_buffers(page); 2382 head = page_buffers(page);
2376 2383
2377 /* last page in the file, zero out any contents past the 2384 /* last page in the file, zero out any contents past the
2378 ** last byte in the file 2385 ** last byte in the file
2379 */ 2386 */
2380 if (page->index >= end_index) { 2387 if (page->index >= end_index) {
2381 char *kaddr; 2388 char *kaddr;
2382 unsigned last_offset; 2389 unsigned last_offset;
2383 2390
2384 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); 2391 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2385 /* no file contents in this page */ 2392 /* no file contents in this page */
2386 if (page->index >= end_index + 1 || !last_offset) { 2393 if (page->index >= end_index + 1 || !last_offset) {
2387 unlock_page(page); 2394 unlock_page(page);
2388 return 0; 2395 return 0;
2389 } 2396 }
2390 kaddr = kmap_atomic(page, KM_USER0); 2397 kaddr = kmap_atomic(page, KM_USER0);
2391 memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset); 2398 memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset);
2392 flush_dcache_page(page); 2399 flush_dcache_page(page);
2393 kunmap_atomic(kaddr, KM_USER0); 2400 kunmap_atomic(kaddr, KM_USER0);
2394 } 2401 }
2395 bh = head; 2402 bh = head;
2396 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); 2403 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
2397 /* first map all the buffers, logging any direct items we find */ 2404 /* first map all the buffers, logging any direct items we find */
2398 do { 2405 do {
2399 if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) || 2406 if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) ||
2400 (buffer_mapped(bh) 2407 (buffer_mapped(bh)
2401 && bh->b_blocknr == 2408 && bh->b_blocknr ==
2402 0))) { 2409 0))) {
2403 /* not mapped yet, or it points to a direct item, search 2410 /* not mapped yet, or it points to a direct item, search
2404 * the btree for the mapping info, and log any direct 2411 * the btree for the mapping info, and log any direct
2405 * items found 2412 * items found
2406 */ 2413 */
2407 if ((error = map_block_for_writepage(inode, bh, block))) { 2414 if ((error = map_block_for_writepage(inode, bh, block))) {
2408 goto fail; 2415 goto fail;
2409 } 2416 }
2410 } 2417 }
2411 bh = bh->b_this_page; 2418 bh = bh->b_this_page;
2412 block++; 2419 block++;
2413 } while (bh != head); 2420 } while (bh != head);
2414 2421
2415 /* 2422 /*
2416 * we start the transaction after map_block_for_writepage, 2423 * we start the transaction after map_block_for_writepage,
2417 * because it can create holes in the file (an unbounded operation). 2424 * because it can create holes in the file (an unbounded operation).
2418 * starting it here, we can make a reliable estimate for how many 2425 * starting it here, we can make a reliable estimate for how many
2419 * blocks we're going to log 2426 * blocks we're going to log
2420 */ 2427 */
2421 if (checked) { 2428 if (checked) {
2422 ClearPageChecked(page); 2429 ClearPageChecked(page);
2423 reiserfs_write_lock(s); 2430 reiserfs_write_lock(s);
2424 error = journal_begin(&th, s, bh_per_page + 1); 2431 error = journal_begin(&th, s, bh_per_page + 1);
2425 if (error) { 2432 if (error) {
2426 reiserfs_write_unlock(s); 2433 reiserfs_write_unlock(s);
2427 goto fail; 2434 goto fail;
2428 } 2435 }
2429 reiserfs_update_inode_transaction(inode); 2436 reiserfs_update_inode_transaction(inode);
2430 } 2437 }
2431 /* now go through and lock any dirty buffers on the page */ 2438 /* now go through and lock any dirty buffers on the page */
2432 do { 2439 do {
2433 get_bh(bh); 2440 get_bh(bh);
2434 if (!buffer_mapped(bh)) 2441 if (!buffer_mapped(bh))
2435 continue; 2442 continue;
2436 if (buffer_mapped(bh) && bh->b_blocknr == 0) 2443 if (buffer_mapped(bh) && bh->b_blocknr == 0)
2437 continue; 2444 continue;
2438 2445
2439 if (checked) { 2446 if (checked) {
2440 reiserfs_prepare_for_journal(s, bh, 1); 2447 reiserfs_prepare_for_journal(s, bh, 1);
2441 journal_mark_dirty(&th, s, bh); 2448 journal_mark_dirty(&th, s, bh);
2442 continue; 2449 continue;
2443 } 2450 }
2444 /* from this point on, we know the buffer is mapped to a 2451 /* from this point on, we know the buffer is mapped to a
2445 * real block and not a direct item 2452 * real block and not a direct item
2446 */ 2453 */
2447 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2454 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2448 lock_buffer(bh); 2455 lock_buffer(bh);
2449 } else { 2456 } else {
2450 if (test_set_buffer_locked(bh)) { 2457 if (test_set_buffer_locked(bh)) {
2451 redirty_page_for_writepage(wbc, page); 2458 redirty_page_for_writepage(wbc, page);
2452 continue; 2459 continue;
2453 } 2460 }
2454 } 2461 }
2455 if (test_clear_buffer_dirty(bh)) { 2462 if (test_clear_buffer_dirty(bh)) {
2456 mark_buffer_async_write(bh); 2463 mark_buffer_async_write(bh);
2457 } else { 2464 } else {
2458 unlock_buffer(bh); 2465 unlock_buffer(bh);
2459 } 2466 }
2460 } while ((bh = bh->b_this_page) != head); 2467 } while ((bh = bh->b_this_page) != head);
2461 2468
2462 if (checked) { 2469 if (checked) {
2463 error = journal_end(&th, s, bh_per_page + 1); 2470 error = journal_end(&th, s, bh_per_page + 1);
2464 reiserfs_write_unlock(s); 2471 reiserfs_write_unlock(s);
2465 if (error) 2472 if (error)
2466 goto fail; 2473 goto fail;
2467 } 2474 }
2468 BUG_ON(PageWriteback(page)); 2475 BUG_ON(PageWriteback(page));
2469 set_page_writeback(page); 2476 set_page_writeback(page);
2470 unlock_page(page); 2477 unlock_page(page);
2471 2478
2472 /* 2479 /*
2473 * since any buffer might be the only dirty buffer on the page, 2480 * since any buffer might be the only dirty buffer on the page,
2474 * the first submit_bh can bring the page out of writeback. 2481 * the first submit_bh can bring the page out of writeback.
2475 * be careful with the buffers. 2482 * be careful with the buffers.
2476 */ 2483 */
2477 do { 2484 do {
2478 struct buffer_head *next = bh->b_this_page; 2485 struct buffer_head *next = bh->b_this_page;
2479 if (buffer_async_write(bh)) { 2486 if (buffer_async_write(bh)) {
2480 submit_bh(WRITE, bh); 2487 submit_bh(WRITE, bh);
2481 nr++; 2488 nr++;
2482 } 2489 }
2483 put_bh(bh); 2490 put_bh(bh);
2484 bh = next; 2491 bh = next;
2485 } while (bh != head); 2492 } while (bh != head);
2486 2493
2487 error = 0; 2494 error = 0;
2488 done: 2495 done:
2489 if (nr == 0) { 2496 if (nr == 0) {
2490 /* 2497 /*
2491 * if this page only had a direct item, it is very possible for 2498 * if this page only had a direct item, it is very possible for
2492 * no io to be required without there being an error. Or, 2499 * no io to be required without there being an error. Or,
2493 * someone else could have locked them and sent them down the 2500 * someone else could have locked them and sent them down the
2494 * pipe without locking the page 2501 * pipe without locking the page
2495 */ 2502 */
2496 bh = head; 2503 bh = head;
2497 do { 2504 do {
2498 if (!buffer_uptodate(bh)) { 2505 if (!buffer_uptodate(bh)) {
2499 partial = 1; 2506 partial = 1;
2500 break; 2507 break;
2501 } 2508 }
2502 bh = bh->b_this_page; 2509 bh = bh->b_this_page;
2503 } while (bh != head); 2510 } while (bh != head);
2504 if (!partial) 2511 if (!partial)
2505 SetPageUptodate(page); 2512 SetPageUptodate(page);
2506 end_page_writeback(page); 2513 end_page_writeback(page);
2507 } 2514 }
2508 return error; 2515 return error;
2509 2516
2510 fail: 2517 fail:
2511 /* catches various errors, we need to make sure any valid dirty blocks 2518 /* catches various errors, we need to make sure any valid dirty blocks
2512 * get to the media. The page is currently locked and not marked for 2519 * get to the media. The page is currently locked and not marked for
2513 * writeback 2520 * writeback
2514 */ 2521 */
2515 ClearPageUptodate(page); 2522 ClearPageUptodate(page);
2516 bh = head; 2523 bh = head;
2517 do { 2524 do {
2518 get_bh(bh); 2525 get_bh(bh);
2519 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { 2526 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2520 lock_buffer(bh); 2527 lock_buffer(bh);
2521 mark_buffer_async_write(bh); 2528 mark_buffer_async_write(bh);
2522 } else { 2529 } else {
2523 /* 2530 /*
2524 * clear any dirty bits that might have come from getting 2531 * clear any dirty bits that might have come from getting
2525 * attached to a dirty page 2532 * attached to a dirty page
2526 */ 2533 */
2527 clear_buffer_dirty(bh); 2534 clear_buffer_dirty(bh);
2528 } 2535 }
2529 bh = bh->b_this_page; 2536 bh = bh->b_this_page;
2530 } while (bh != head); 2537 } while (bh != head);
2531 SetPageError(page); 2538 SetPageError(page);
2532 BUG_ON(PageWriteback(page)); 2539 BUG_ON(PageWriteback(page));
2533 set_page_writeback(page); 2540 set_page_writeback(page);
2534 unlock_page(page); 2541 unlock_page(page);
2535 do { 2542 do {
2536 struct buffer_head *next = bh->b_this_page; 2543 struct buffer_head *next = bh->b_this_page;
2537 if (buffer_async_write(bh)) { 2544 if (buffer_async_write(bh)) {
2538 clear_buffer_dirty(bh); 2545 clear_buffer_dirty(bh);
2539 submit_bh(WRITE, bh); 2546 submit_bh(WRITE, bh);
2540 nr++; 2547 nr++;
2541 } 2548 }
2542 put_bh(bh); 2549 put_bh(bh);
2543 bh = next; 2550 bh = next;
2544 } while (bh != head); 2551 } while (bh != head);
2545 goto done; 2552 goto done;
2546 } 2553 }
2547 2554
2548 static int reiserfs_readpage(struct file *f, struct page *page) 2555 static int reiserfs_readpage(struct file *f, struct page *page)
2549 { 2556 {
2550 return block_read_full_page(page, reiserfs_get_block); 2557 return block_read_full_page(page, reiserfs_get_block);
2551 } 2558 }
2552 2559
2553 static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) 2560 static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2554 { 2561 {
2555 struct inode *inode = page->mapping->host; 2562 struct inode *inode = page->mapping->host;
2556 reiserfs_wait_on_write_block(inode->i_sb); 2563 reiserfs_wait_on_write_block(inode->i_sb);
2557 return reiserfs_write_full_page(page, wbc); 2564 return reiserfs_write_full_page(page, wbc);
2558 } 2565 }
2559 2566
2560 static int reiserfs_prepare_write(struct file *f, struct page *page, 2567 static int reiserfs_prepare_write(struct file *f, struct page *page,
2561 unsigned from, unsigned to) 2568 unsigned from, unsigned to)
2562 { 2569 {
2563 struct inode *inode = page->mapping->host; 2570 struct inode *inode = page->mapping->host;
2564 int ret; 2571 int ret;
2565 int old_ref = 0; 2572 int old_ref = 0;
2566 2573
2567 reiserfs_wait_on_write_block(inode->i_sb); 2574 reiserfs_wait_on_write_block(inode->i_sb);
2568 fix_tail_page_for_writing(page); 2575 fix_tail_page_for_writing(page);
2569 if (reiserfs_transaction_running(inode->i_sb)) { 2576 if (reiserfs_transaction_running(inode->i_sb)) {
2570 struct reiserfs_transaction_handle *th; 2577 struct reiserfs_transaction_handle *th;
2571 th = (struct reiserfs_transaction_handle *)current-> 2578 th = (struct reiserfs_transaction_handle *)current->
2572 journal_info; 2579 journal_info;
2573 BUG_ON(!th->t_refcount); 2580 BUG_ON(!th->t_refcount);
2574 BUG_ON(!th->t_trans_id); 2581 BUG_ON(!th->t_trans_id);
2575 old_ref = th->t_refcount; 2582 old_ref = th->t_refcount;
2576 th->t_refcount++; 2583 th->t_refcount++;
2577 } 2584 }
2578 2585
2579 ret = block_prepare_write(page, from, to, reiserfs_get_block); 2586 ret = block_prepare_write(page, from, to, reiserfs_get_block);
2580 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2587 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2581 struct reiserfs_transaction_handle *th = current->journal_info; 2588 struct reiserfs_transaction_handle *th = current->journal_info;
2582 /* this gets a little ugly. If reiserfs_get_block returned an 2589 /* this gets a little ugly. If reiserfs_get_block returned an
2583 * error and left a transacstion running, we've got to close it, 2590 * error and left a transacstion running, we've got to close it,
2584 * and we've got to free handle if it was a persistent transaction. 2591 * and we've got to free handle if it was a persistent transaction.
2585 * 2592 *
2586 * But, if we had nested into an existing transaction, we need 2593 * But, if we had nested into an existing transaction, we need
2587 * to just drop the ref count on the handle. 2594 * to just drop the ref count on the handle.
2588 * 2595 *
2589 * If old_ref == 0, the transaction is from reiserfs_get_block, 2596 * If old_ref == 0, the transaction is from reiserfs_get_block,
2590 * and it was a persistent trans. Otherwise, it was nested above. 2597 * and it was a persistent trans. Otherwise, it was nested above.
2591 */ 2598 */
2592 if (th->t_refcount > old_ref) { 2599 if (th->t_refcount > old_ref) {
2593 if (old_ref) 2600 if (old_ref)
2594 th->t_refcount--; 2601 th->t_refcount--;
2595 else { 2602 else {
2596 int err; 2603 int err;
2597 reiserfs_write_lock(inode->i_sb); 2604 reiserfs_write_lock(inode->i_sb);
2598 err = reiserfs_end_persistent_transaction(th); 2605 err = reiserfs_end_persistent_transaction(th);
2599 reiserfs_write_unlock(inode->i_sb); 2606 reiserfs_write_unlock(inode->i_sb);
2600 if (err) 2607 if (err)
2601 ret = err; 2608 ret = err;
2602 } 2609 }
2603 } 2610 }
2604 } 2611 }
2605 return ret; 2612 return ret;
2606 2613
2607 } 2614 }
2608 2615
2609 static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) 2616 static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2610 { 2617 {
2611 return generic_block_bmap(as, block, reiserfs_bmap); 2618 return generic_block_bmap(as, block, reiserfs_bmap);
2612 } 2619 }
2613 2620
2614 static int reiserfs_commit_write(struct file *f, struct page *page, 2621 static int reiserfs_commit_write(struct file *f, struct page *page,
2615 unsigned from, unsigned to) 2622 unsigned from, unsigned to)
2616 { 2623 {
2617 struct inode *inode = page->mapping->host; 2624 struct inode *inode = page->mapping->host;
2618 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; 2625 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
2619 int ret = 0; 2626 int ret = 0;
2620 int update_sd = 0; 2627 int update_sd = 0;
2621 struct reiserfs_transaction_handle *th = NULL; 2628 struct reiserfs_transaction_handle *th = NULL;
2622 2629
2623 reiserfs_wait_on_write_block(inode->i_sb); 2630 reiserfs_wait_on_write_block(inode->i_sb);
2624 if (reiserfs_transaction_running(inode->i_sb)) { 2631 if (reiserfs_transaction_running(inode->i_sb)) {
2625 th = current->journal_info; 2632 th = current->journal_info;
2626 } 2633 }
2627 reiserfs_commit_page(inode, page, from, to); 2634 reiserfs_commit_page(inode, page, from, to);
2628 2635
2629 /* generic_commit_write does this for us, but does not update the 2636 /* generic_commit_write does this for us, but does not update the
2630 ** transaction tracking stuff when the size changes. So, we have 2637 ** transaction tracking stuff when the size changes. So, we have
2631 ** to do the i_size updates here. 2638 ** to do the i_size updates here.
2632 */ 2639 */
2633 if (pos > inode->i_size) { 2640 if (pos > inode->i_size) {
2634 struct reiserfs_transaction_handle myth; 2641 struct reiserfs_transaction_handle myth;
2635 reiserfs_write_lock(inode->i_sb); 2642 reiserfs_write_lock(inode->i_sb);
2636 /* If the file have grown beyond the border where it 2643 /* If the file have grown beyond the border where it
2637 can have a tail, unmark it as needing a tail 2644 can have a tail, unmark it as needing a tail
2638 packing */ 2645 packing */
2639 if ((have_large_tails(inode->i_sb) 2646 if ((have_large_tails(inode->i_sb)
2640 && inode->i_size > i_block_size(inode) * 4) 2647 && inode->i_size > i_block_size(inode) * 4)
2641 || (have_small_tails(inode->i_sb) 2648 || (have_small_tails(inode->i_sb)
2642 && inode->i_size > i_block_size(inode))) 2649 && inode->i_size > i_block_size(inode)))
2643 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 2650 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2644 2651
2645 ret = journal_begin(&myth, inode->i_sb, 1); 2652 ret = journal_begin(&myth, inode->i_sb, 1);
2646 if (ret) { 2653 if (ret) {
2647 reiserfs_write_unlock(inode->i_sb); 2654 reiserfs_write_unlock(inode->i_sb);
2648 goto journal_error; 2655 goto journal_error;
2649 } 2656 }
2650 reiserfs_update_inode_transaction(inode); 2657 reiserfs_update_inode_transaction(inode);
2651 inode->i_size = pos; 2658 inode->i_size = pos;
2652 /* 2659 /*
2653 * this will just nest into our transaction. It's important 2660 * this will just nest into our transaction. It's important
2654 * to use mark_inode_dirty so the inode gets pushed around on the 2661 * to use mark_inode_dirty so the inode gets pushed around on the
2655 * dirty lists, and so that O_SYNC works as expected 2662 * dirty lists, and so that O_SYNC works as expected
2656 */ 2663 */
2657 mark_inode_dirty(inode); 2664 mark_inode_dirty(inode);
2658 reiserfs_update_sd(&myth, inode); 2665 reiserfs_update_sd(&myth, inode);
2659 update_sd = 1; 2666 update_sd = 1;
2660 ret = journal_end(&myth, inode->i_sb, 1); 2667 ret = journal_end(&myth, inode->i_sb, 1);
2661 reiserfs_write_unlock(inode->i_sb); 2668 reiserfs_write_unlock(inode->i_sb);
2662 if (ret) 2669 if (ret)
2663 goto journal_error; 2670 goto journal_error;
2664 } 2671 }
2665 if (th) { 2672 if (th) {
2666 reiserfs_write_lock(inode->i_sb); 2673 reiserfs_write_lock(inode->i_sb);
2667 if (!update_sd) 2674 if (!update_sd)
2668 mark_inode_dirty(inode); 2675 mark_inode_dirty(inode);
2669 ret = reiserfs_end_persistent_transaction(th); 2676 ret = reiserfs_end_persistent_transaction(th);
2670 reiserfs_write_unlock(inode->i_sb); 2677 reiserfs_write_unlock(inode->i_sb);
2671 if (ret) 2678 if (ret)
2672 goto out; 2679 goto out;
2673 } 2680 }
2674 2681
2675 out: 2682 out:
2676 return ret; 2683 return ret;
2677 2684
2678 journal_error: 2685 journal_error:
2679 if (th) { 2686 if (th) {
2680 reiserfs_write_lock(inode->i_sb); 2687 reiserfs_write_lock(inode->i_sb);
2681 if (!update_sd) 2688 if (!update_sd)
2682 reiserfs_update_sd(th, inode); 2689 reiserfs_update_sd(th, inode);
2683 ret = reiserfs_end_persistent_transaction(th); 2690 ret = reiserfs_end_persistent_transaction(th);
2684 reiserfs_write_unlock(inode->i_sb); 2691 reiserfs_write_unlock(inode->i_sb);
2685 } 2692 }
2686 2693
2687 return ret; 2694 return ret;
2688 } 2695 }
2689 2696
2690 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) 2697 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
2691 { 2698 {
2692 if (reiserfs_attrs(inode->i_sb)) { 2699 if (reiserfs_attrs(inode->i_sb)) {
2693 if (sd_attrs & REISERFS_SYNC_FL) 2700 if (sd_attrs & REISERFS_SYNC_FL)
2694 inode->i_flags |= S_SYNC; 2701 inode->i_flags |= S_SYNC;
2695 else 2702 else
2696 inode->i_flags &= ~S_SYNC; 2703 inode->i_flags &= ~S_SYNC;
2697 if (sd_attrs & REISERFS_IMMUTABLE_FL) 2704 if (sd_attrs & REISERFS_IMMUTABLE_FL)
2698 inode->i_flags |= S_IMMUTABLE; 2705 inode->i_flags |= S_IMMUTABLE;
2699 else 2706 else
2700 inode->i_flags &= ~S_IMMUTABLE; 2707 inode->i_flags &= ~S_IMMUTABLE;
2701 if (sd_attrs & REISERFS_APPEND_FL) 2708 if (sd_attrs & REISERFS_APPEND_FL)
2702 inode->i_flags |= S_APPEND; 2709 inode->i_flags |= S_APPEND;
2703 else 2710 else
2704 inode->i_flags &= ~S_APPEND; 2711 inode->i_flags &= ~S_APPEND;
2705 if (sd_attrs & REISERFS_NOATIME_FL) 2712 if (sd_attrs & REISERFS_NOATIME_FL)
2706 inode->i_flags |= S_NOATIME; 2713 inode->i_flags |= S_NOATIME;
2707 else 2714 else
2708 inode->i_flags &= ~S_NOATIME; 2715 inode->i_flags &= ~S_NOATIME;
2709 if (sd_attrs & REISERFS_NOTAIL_FL) 2716 if (sd_attrs & REISERFS_NOTAIL_FL)
2710 REISERFS_I(inode)->i_flags |= i_nopack_mask; 2717 REISERFS_I(inode)->i_flags |= i_nopack_mask;
2711 else 2718 else
2712 REISERFS_I(inode)->i_flags &= ~i_nopack_mask; 2719 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
2713 } 2720 }
2714 } 2721 }
2715 2722
2716 void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) 2723 void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2717 { 2724 {
2718 if (reiserfs_attrs(inode->i_sb)) { 2725 if (reiserfs_attrs(inode->i_sb)) {
2719 if (inode->i_flags & S_IMMUTABLE) 2726 if (inode->i_flags & S_IMMUTABLE)
2720 *sd_attrs |= REISERFS_IMMUTABLE_FL; 2727 *sd_attrs |= REISERFS_IMMUTABLE_FL;
2721 else 2728 else
2722 *sd_attrs &= ~REISERFS_IMMUTABLE_FL; 2729 *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2723 if (inode->i_flags & S_SYNC) 2730 if (inode->i_flags & S_SYNC)
2724 *sd_attrs |= REISERFS_SYNC_FL; 2731 *sd_attrs |= REISERFS_SYNC_FL;
2725 else 2732 else
2726 *sd_attrs &= ~REISERFS_SYNC_FL; 2733 *sd_attrs &= ~REISERFS_SYNC_FL;
2727 if (inode->i_flags & S_NOATIME) 2734 if (inode->i_flags & S_NOATIME)
2728 *sd_attrs |= REISERFS_NOATIME_FL; 2735 *sd_attrs |= REISERFS_NOATIME_FL;
2729 else 2736 else
2730 *sd_attrs &= ~REISERFS_NOATIME_FL; 2737 *sd_attrs &= ~REISERFS_NOATIME_FL;
2731 if (REISERFS_I(inode)->i_flags & i_nopack_mask) 2738 if (REISERFS_I(inode)->i_flags & i_nopack_mask)
2732 *sd_attrs |= REISERFS_NOTAIL_FL; 2739 *sd_attrs |= REISERFS_NOTAIL_FL;
2733 else 2740 else
2734 *sd_attrs &= ~REISERFS_NOTAIL_FL; 2741 *sd_attrs &= ~REISERFS_NOTAIL_FL;
2735 } 2742 }
2736 } 2743 }
2737 2744
2738 /* decide if this buffer needs to stay around for data logging or ordered 2745 /* decide if this buffer needs to stay around for data logging or ordered
2739 ** write purposes 2746 ** write purposes
2740 */ 2747 */
2741 static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) 2748 static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2742 { 2749 {
2743 int ret = 1; 2750 int ret = 1;
2744 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 2751 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2745 2752
2746 lock_buffer(bh); 2753 lock_buffer(bh);
2747 spin_lock(&j->j_dirty_buffers_lock); 2754 spin_lock(&j->j_dirty_buffers_lock);
2748 if (!buffer_mapped(bh)) { 2755 if (!buffer_mapped(bh)) {
2749 goto free_jh; 2756 goto free_jh;
2750 } 2757 }
2751 /* the page is locked, and the only places that log a data buffer 2758 /* the page is locked, and the only places that log a data buffer
2752 * also lock the page. 2759 * also lock the page.
2753 */ 2760 */
2754 if (reiserfs_file_data_log(inode)) { 2761 if (reiserfs_file_data_log(inode)) {
2755 /* 2762 /*
2756 * very conservative, leave the buffer pinned if 2763 * very conservative, leave the buffer pinned if
2757 * anyone might need it. 2764 * anyone might need it.
2758 */ 2765 */
2759 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { 2766 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2760 ret = 0; 2767 ret = 0;
2761 } 2768 }
2762 } else if (buffer_dirty(bh)) { 2769 } else if (buffer_dirty(bh)) {
2763 struct reiserfs_journal_list *jl; 2770 struct reiserfs_journal_list *jl;
2764 struct reiserfs_jh *jh = bh->b_private; 2771 struct reiserfs_jh *jh = bh->b_private;
2765 2772
2766 /* why is this safe? 2773 /* why is this safe?
2767 * reiserfs_setattr updates i_size in the on disk 2774 * reiserfs_setattr updates i_size in the on disk
2768 * stat data before allowing vmtruncate to be called. 2775 * stat data before allowing vmtruncate to be called.
2769 * 2776 *
2770 * If buffer was put onto the ordered list for this 2777 * If buffer was put onto the ordered list for this
2771 * transaction, we know for sure either this transaction 2778 * transaction, we know for sure either this transaction
2772 * or an older one already has updated i_size on disk, 2779 * or an older one already has updated i_size on disk,
2773 * and this ordered data won't be referenced in the file 2780 * and this ordered data won't be referenced in the file
2774 * if we crash. 2781 * if we crash.
2775 * 2782 *
2776 * if the buffer was put onto the ordered list for an older 2783 * if the buffer was put onto the ordered list for an older
2777 * transaction, we need to leave it around 2784 * transaction, we need to leave it around
2778 */ 2785 */
2779 if (jh && (jl = jh->jl) 2786 if (jh && (jl = jh->jl)
2780 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) 2787 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2781 ret = 0; 2788 ret = 0;
2782 } 2789 }
2783 free_jh: 2790 free_jh:
2784 if (ret && bh->b_private) { 2791 if (ret && bh->b_private) {
2785 reiserfs_free_jh(bh); 2792 reiserfs_free_jh(bh);
2786 } 2793 }
2787 spin_unlock(&j->j_dirty_buffers_lock); 2794 spin_unlock(&j->j_dirty_buffers_lock);
2788 unlock_buffer(bh); 2795 unlock_buffer(bh);
2789 return ret; 2796 return ret;
2790 } 2797 }
2791 2798
2792 /* clm -- taken from fs/buffer.c:block_invalidate_page */ 2799 /* clm -- taken from fs/buffer.c:block_invalidate_page */
2793 static int reiserfs_invalidatepage(struct page *page, unsigned long offset) 2800 static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
2794 { 2801 {
2795 struct buffer_head *head, *bh, *next; 2802 struct buffer_head *head, *bh, *next;
2796 struct inode *inode = page->mapping->host; 2803 struct inode *inode = page->mapping->host;
2797 unsigned int curr_off = 0; 2804 unsigned int curr_off = 0;
2798 int ret = 1; 2805 int ret = 1;
2799 2806
2800 BUG_ON(!PageLocked(page)); 2807 BUG_ON(!PageLocked(page));
2801 2808
2802 if (offset == 0) 2809 if (offset == 0)
2803 ClearPageChecked(page); 2810 ClearPageChecked(page);
2804 2811
2805 if (!page_has_buffers(page)) 2812 if (!page_has_buffers(page))
2806 goto out; 2813 goto out;
2807 2814
2808 head = page_buffers(page); 2815 head = page_buffers(page);
2809 bh = head; 2816 bh = head;
2810 do { 2817 do {
2811 unsigned int next_off = curr_off + bh->b_size; 2818 unsigned int next_off = curr_off + bh->b_size;
2812 next = bh->b_this_page; 2819 next = bh->b_this_page;
2813 2820
2814 /* 2821 /*
2815 * is this block fully invalidated? 2822 * is this block fully invalidated?
2816 */ 2823 */
2817 if (offset <= curr_off) { 2824 if (offset <= curr_off) {
2818 if (invalidatepage_can_drop(inode, bh)) 2825 if (invalidatepage_can_drop(inode, bh))
2819 reiserfs_unmap_buffer(bh); 2826 reiserfs_unmap_buffer(bh);
2820 else 2827 else
2821 ret = 0; 2828 ret = 0;
2822 } 2829 }
2823 curr_off = next_off; 2830 curr_off = next_off;
2824 bh = next; 2831 bh = next;
2825 } while (bh != head); 2832 } while (bh != head);
2826 2833
2827 /* 2834 /*
2828 * We release buffers only if the entire page is being invalidated. 2835 * We release buffers only if the entire page is being invalidated.
2829 * The get_block cached value has been unconditionally invalidated, 2836 * The get_block cached value has been unconditionally invalidated,
2830 * so real IO is not possible anymore. 2837 * so real IO is not possible anymore.
2831 */ 2838 */
2832 if (!offset && ret) 2839 if (!offset && ret)
2833 ret = try_to_release_page(page, 0); 2840 ret = try_to_release_page(page, 0);
2834 out: 2841 out:
2835 return ret; 2842 return ret;
2836 } 2843 }
2837 2844
2838 static int reiserfs_set_page_dirty(struct page *page) 2845 static int reiserfs_set_page_dirty(struct page *page)
2839 { 2846 {
2840 struct inode *inode = page->mapping->host; 2847 struct inode *inode = page->mapping->host;
2841 if (reiserfs_file_data_log(inode)) { 2848 if (reiserfs_file_data_log(inode)) {
2842 SetPageChecked(page); 2849 SetPageChecked(page);
2843 return __set_page_dirty_nobuffers(page); 2850 return __set_page_dirty_nobuffers(page);
2844 } 2851 }
2845 return __set_page_dirty_buffers(page); 2852 return __set_page_dirty_buffers(page);
2846 } 2853 }
2847 2854
2848 /* 2855 /*
2849 * Returns 1 if the page's buffers were dropped. The page is locked. 2856 * Returns 1 if the page's buffers were dropped. The page is locked.
2850 * 2857 *
2851 * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads 2858 * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
2852 * in the buffers at page_buffers(page). 2859 * in the buffers at page_buffers(page).
2853 * 2860 *
2854 * even in -o notail mode, we can't be sure an old mount without -o notail 2861 * even in -o notail mode, we can't be sure an old mount without -o notail
2855 * didn't create files with tails. 2862 * didn't create files with tails.
2856 */ 2863 */
2857 static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) 2864 static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
2858 { 2865 {
2859 struct inode *inode = page->mapping->host; 2866 struct inode *inode = page->mapping->host;
2860 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 2867 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2861 struct buffer_head *head; 2868 struct buffer_head *head;
2862 struct buffer_head *bh; 2869 struct buffer_head *bh;
2863 int ret = 1; 2870 int ret = 1;
2864 2871
2865 WARN_ON(PageChecked(page)); 2872 WARN_ON(PageChecked(page));
2866 spin_lock(&j->j_dirty_buffers_lock); 2873 spin_lock(&j->j_dirty_buffers_lock);
2867 head = page_buffers(page); 2874 head = page_buffers(page);
2868 bh = head; 2875 bh = head;
2869 do { 2876 do {
2870 if (bh->b_private) { 2877 if (bh->b_private) {
2871 if (!buffer_dirty(bh) && !buffer_locked(bh)) { 2878 if (!buffer_dirty(bh) && !buffer_locked(bh)) {
2872 reiserfs_free_jh(bh); 2879 reiserfs_free_jh(bh);
2873 } else { 2880 } else {
2874 ret = 0; 2881 ret = 0;
2875 break; 2882 break;
2876 } 2883 }
2877 } 2884 }
2878 bh = bh->b_this_page; 2885 bh = bh->b_this_page;
2879 } while (bh != head); 2886 } while (bh != head);
2880 if (ret) 2887 if (ret)
2881 ret = try_to_free_buffers(page); 2888 ret = try_to_free_buffers(page);
2882 spin_unlock(&j->j_dirty_buffers_lock); 2889 spin_unlock(&j->j_dirty_buffers_lock);
2883 return ret; 2890 return ret;
2884 } 2891 }
2885 2892
2886 /* We thank Mingming Cao for helping us understand in great detail what 2893 /* We thank Mingming Cao for helping us understand in great detail what
2887 to do in this section of the code. */ 2894 to do in this section of the code. */
2888 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 2895 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
2889 const struct iovec *iov, loff_t offset, 2896 const struct iovec *iov, loff_t offset,
2890 unsigned long nr_segs) 2897 unsigned long nr_segs)
2891 { 2898 {
2892 struct file *file = iocb->ki_filp; 2899 struct file *file = iocb->ki_filp;
2893 struct inode *inode = file->f_mapping->host; 2900 struct inode *inode = file->f_mapping->host;
2894 2901
2895 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 2902 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2896 offset, nr_segs, 2903 offset, nr_segs,
2897 reiserfs_get_blocks_direct_io, NULL); 2904 reiserfs_get_blocks_direct_io, NULL);
2898 } 2905 }
2899 2906
2900 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 2907 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
2901 { 2908 {
2902 struct inode *inode = dentry->d_inode; 2909 struct inode *inode = dentry->d_inode;
2903 int error; 2910 int error;
2904 unsigned int ia_valid = attr->ia_valid; 2911 unsigned int ia_valid = attr->ia_valid;
2905 reiserfs_write_lock(inode->i_sb); 2912 reiserfs_write_lock(inode->i_sb);
2906 if (attr->ia_valid & ATTR_SIZE) { 2913 if (attr->ia_valid & ATTR_SIZE) {
2907 /* version 2 items will be caught by the s_maxbytes check 2914 /* version 2 items will be caught by the s_maxbytes check
2908 ** done for us in vmtruncate 2915 ** done for us in vmtruncate
2909 */ 2916 */
2910 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && 2917 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
2911 attr->ia_size > MAX_NON_LFS) { 2918 attr->ia_size > MAX_NON_LFS) {
2912 error = -EFBIG; 2919 error = -EFBIG;
2913 goto out; 2920 goto out;
2914 } 2921 }
2915 /* fill in hole pointers in the expanding truncate case. */ 2922 /* fill in hole pointers in the expanding truncate case. */
2916 if (attr->ia_size > inode->i_size) { 2923 if (attr->ia_size > inode->i_size) {
2917 error = generic_cont_expand(inode, attr->ia_size); 2924 error = generic_cont_expand(inode, attr->ia_size);
2918 if (REISERFS_I(inode)->i_prealloc_count > 0) { 2925 if (REISERFS_I(inode)->i_prealloc_count > 0) {
2919 int err; 2926 int err;
2920 struct reiserfs_transaction_handle th; 2927 struct reiserfs_transaction_handle th;
2921 /* we're changing at most 2 bitmaps, inode + super */ 2928 /* we're changing at most 2 bitmaps, inode + super */
2922 err = journal_begin(&th, inode->i_sb, 4); 2929 err = journal_begin(&th, inode->i_sb, 4);
2923 if (!err) { 2930 if (!err) {
2924 reiserfs_discard_prealloc(&th, inode); 2931 reiserfs_discard_prealloc(&th, inode);
2925 err = journal_end(&th, inode->i_sb, 4); 2932 err = journal_end(&th, inode->i_sb, 4);
2926 } 2933 }
2927 if (err) 2934 if (err)
2928 error = err; 2935 error = err;
2929 } 2936 }
2930 if (error) 2937 if (error)
2931 goto out; 2938 goto out;
2932 } 2939 }
2933 } 2940 }
2934 2941
2935 if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || 2942 if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) ||
2936 ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && 2943 ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
2937 (get_inode_sd_version(inode) == STAT_DATA_V1)) { 2944 (get_inode_sd_version(inode) == STAT_DATA_V1)) {
2938 /* stat data of format v3.5 has 16 bit uid and gid */ 2945 /* stat data of format v3.5 has 16 bit uid and gid */
2939 error = -EINVAL; 2946 error = -EINVAL;
2940 goto out; 2947 goto out;
2941 } 2948 }
2942 2949
2943 error = inode_change_ok(inode, attr); 2950 error = inode_change_ok(inode, attr);
2944 if (!error) { 2951 if (!error) {
2945 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 2952 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
2946 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 2953 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
2947 error = reiserfs_chown_xattrs(inode, attr); 2954 error = reiserfs_chown_xattrs(inode, attr);
2948 2955
2949 if (!error) { 2956 if (!error) {
2950 struct reiserfs_transaction_handle th; 2957 struct reiserfs_transaction_handle th;
2951 int jbegin_count = 2958 int jbegin_count =
2952 2 * 2959 2 *
2953 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + 2960 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
2954 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + 2961 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
2955 2; 2962 2;
2956 2963
2957 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ 2964 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
2958 error = 2965 error =
2959 journal_begin(&th, inode->i_sb, 2966 journal_begin(&th, inode->i_sb,
2960 jbegin_count); 2967 jbegin_count);
2961 if (error) 2968 if (error)
2962 goto out; 2969 goto out;
2963 error = 2970 error =
2964 DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; 2971 DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
2965 if (error) { 2972 if (error) {
2966 journal_end(&th, inode->i_sb, 2973 journal_end(&th, inode->i_sb,
2967 jbegin_count); 2974 jbegin_count);
2968 goto out; 2975 goto out;
2969 } 2976 }
2970 /* Update corresponding info in inode so that everything is in 2977 /* Update corresponding info in inode so that everything is in
2971 * one transaction */ 2978 * one transaction */
2972 if (attr->ia_valid & ATTR_UID) 2979 if (attr->ia_valid & ATTR_UID)
2973 inode->i_uid = attr->ia_uid; 2980 inode->i_uid = attr->ia_uid;
2974 if (attr->ia_valid & ATTR_GID) 2981 if (attr->ia_valid & ATTR_GID)
2975 inode->i_gid = attr->ia_gid; 2982 inode->i_gid = attr->ia_gid;
2976 mark_inode_dirty(inode); 2983 mark_inode_dirty(inode);
2977 error = 2984 error =
2978 journal_end(&th, inode->i_sb, jbegin_count); 2985 journal_end(&th, inode->i_sb, jbegin_count);
2979 } 2986 }
2980 } 2987 }
2981 if (!error) 2988 if (!error)
2982 error = inode_setattr(inode, attr); 2989 error = inode_setattr(inode, attr);
2983 } 2990 }
2984 2991
2985 if (!error && reiserfs_posixacl(inode->i_sb)) { 2992 if (!error && reiserfs_posixacl(inode->i_sb)) {
2986 if (attr->ia_valid & ATTR_MODE) 2993 if (attr->ia_valid & ATTR_MODE)
2987 error = reiserfs_acl_chmod(inode); 2994 error = reiserfs_acl_chmod(inode);
2988 } 2995 }
2989 2996
2990 out: 2997 out:
2991 reiserfs_write_unlock(inode->i_sb); 2998 reiserfs_write_unlock(inode->i_sb);
2992 return error; 2999 return error;
2993 } 3000 }
2994 3001
2995 struct address_space_operations reiserfs_address_space_operations = { 3002 struct address_space_operations reiserfs_address_space_operations = {
2996 .writepage = reiserfs_writepage, 3003 .writepage = reiserfs_writepage,
2997 .readpage = reiserfs_readpage, 3004 .readpage = reiserfs_readpage,
2998 .readpages = reiserfs_readpages, 3005 .readpages = reiserfs_readpages,
2999 .releasepage = reiserfs_releasepage, 3006 .releasepage = reiserfs_releasepage,
3000 .invalidatepage = reiserfs_invalidatepage, 3007 .invalidatepage = reiserfs_invalidatepage,
3001 .sync_page = block_sync_page, 3008 .sync_page = block_sync_page,
3002 .prepare_write = reiserfs_prepare_write, 3009 .prepare_write = reiserfs_prepare_write,
3003 .commit_write = reiserfs_commit_write, 3010 .commit_write = reiserfs_commit_write,
3004 .bmap = reiserfs_aop_bmap, 3011 .bmap = reiserfs_aop_bmap,
3005 .direct_IO = reiserfs_direct_IO, 3012 .direct_IO = reiserfs_direct_IO,
3006 .set_page_dirty = reiserfs_set_page_dirty, 3013 .set_page_dirty = reiserfs_set_page_dirty,
3007 }; 3014 };
3008 3015
fs/reiserfs/journal.c
1 /* 1 /*
2 ** Write ahead logging implementation copyright Chris Mason 2000 2 ** Write ahead logging implementation copyright Chris Mason 2000
3 ** 3 **
4 ** The background commits make this code very interelated, and 4 ** The background commits make this code very interelated, and
5 ** overly complex. I need to rethink things a bit....The major players: 5 ** overly complex. I need to rethink things a bit....The major players:
6 ** 6 **
7 ** journal_begin -- call with the number of blocks you expect to log. 7 ** journal_begin -- call with the number of blocks you expect to log.
8 ** If the current transaction is too 8 ** If the current transaction is too
9 ** old, it will block until the current transaction is 9 ** old, it will block until the current transaction is
10 ** finished, and then start a new one. 10 ** finished, and then start a new one.
11 ** Usually, your transaction will get joined in with 11 ** Usually, your transaction will get joined in with
12 ** previous ones for speed. 12 ** previous ones for speed.
13 ** 13 **
14 ** journal_join -- same as journal_begin, but won't block on the current 14 ** journal_join -- same as journal_begin, but won't block on the current
15 ** transaction regardless of age. Don't ever call 15 ** transaction regardless of age. Don't ever call
16 ** this. Ever. There are only two places it should be 16 ** this. Ever. There are only two places it should be
17 ** called from, and they are both inside this file. 17 ** called from, and they are both inside this file.
18 ** 18 **
19 ** journal_mark_dirty -- adds blocks into this transaction. clears any flags 19 ** journal_mark_dirty -- adds blocks into this transaction. clears any flags
20 ** that might make them get sent to disk 20 ** that might make them get sent to disk
21 ** and then marks them BH_JDirty. Puts the buffer head 21 ** and then marks them BH_JDirty. Puts the buffer head
22 ** into the current transaction hash. 22 ** into the current transaction hash.
23 ** 23 **
24 ** journal_end -- if the current transaction is batchable, it does nothing 24 ** journal_end -- if the current transaction is batchable, it does nothing
25 ** otherwise, it could do an async/synchronous commit, or 25 ** otherwise, it could do an async/synchronous commit, or
26 ** a full flush of all log and real blocks in the 26 ** a full flush of all log and real blocks in the
27 ** transaction. 27 ** transaction.
28 ** 28 **
29 ** flush_old_commits -- if the current transaction is too old, it is ended and 29 ** flush_old_commits -- if the current transaction is too old, it is ended and
30 ** commit blocks are sent to disk. Forces commit blocks 30 ** commit blocks are sent to disk. Forces commit blocks
31 ** to disk for all backgrounded commits that have been 31 ** to disk for all backgrounded commits that have been
32 ** around too long. 32 ** around too long.
33 ** -- Note, if you call this as an immediate flush from 33 ** -- Note, if you call this as an immediate flush from
34 ** from within kupdate, it will ignore the immediate flag 34 ** from within kupdate, it will ignore the immediate flag
35 */ 35 */
36 36
37 #include <linux/config.h> 37 #include <linux/config.h>
38 #include <asm/uaccess.h> 38 #include <asm/uaccess.h>
39 #include <asm/system.h> 39 #include <asm/system.h>
40 40
41 #include <linux/time.h> 41 #include <linux/time.h>
42 #include <asm/semaphore.h> 42 #include <asm/semaphore.h>
43 43
44 #include <linux/vmalloc.h> 44 #include <linux/vmalloc.h>
45 #include <linux/reiserfs_fs.h> 45 #include <linux/reiserfs_fs.h>
46 46
47 #include <linux/kernel.h> 47 #include <linux/kernel.h>
48 #include <linux/errno.h> 48 #include <linux/errno.h>
49 #include <linux/fcntl.h> 49 #include <linux/fcntl.h>
50 #include <linux/stat.h> 50 #include <linux/stat.h>
51 #include <linux/string.h> 51 #include <linux/string.h>
52 #include <linux/smp_lock.h> 52 #include <linux/smp_lock.h>
53 #include <linux/buffer_head.h> 53 #include <linux/buffer_head.h>
54 #include <linux/workqueue.h> 54 #include <linux/workqueue.h>
55 #include <linux/writeback.h> 55 #include <linux/writeback.h>
56 #include <linux/blkdev.h> 56 #include <linux/blkdev.h>
57 57
58 /* gets a struct reiserfs_journal_list * from a list head */ 58 /* gets a struct reiserfs_journal_list * from a list head */
59 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 59 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
60 j_list)) 60 j_list))
61 #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 61 #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
62 j_working_list)) 62 j_working_list))
63 63
64 /* the number of mounted filesystems. This is used to decide when to 64 /* the number of mounted filesystems. This is used to decide when to
65 ** start and kill the commit workqueue 65 ** start and kill the commit workqueue
66 */ 66 */
67 static int reiserfs_mounted_fs_count; 67 static int reiserfs_mounted_fs_count;
68 68
69 static struct workqueue_struct *commit_wq; 69 static struct workqueue_struct *commit_wq;
70 70
71 #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit 71 #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit
72 structs at 4k */ 72 structs at 4k */
73 #define BUFNR 64 /*read ahead */ 73 #define BUFNR 64 /*read ahead */
74 74
75 /* cnode stat bits. Move these into reiserfs_fs.h */ 75 /* cnode stat bits. Move these into reiserfs_fs.h */
76 76
77 #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ 77 #define BLOCK_FREED 2 /* this block was freed, and can't be written. */
78 #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ 78 #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */
79 79
80 #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ 80 #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */
81 #define BLOCK_DIRTIED 5 81 #define BLOCK_DIRTIED 5
82 82
83 /* journal list state bits */ 83 /* journal list state bits */
84 #define LIST_TOUCHED 1 84 #define LIST_TOUCHED 1
85 #define LIST_DIRTY 2 85 #define LIST_DIRTY 2
86 #define LIST_COMMIT_PENDING 4 /* someone will commit this list */ 86 #define LIST_COMMIT_PENDING 4 /* someone will commit this list */
87 87
88 /* flags for do_journal_end */ 88 /* flags for do_journal_end */
89 #define FLUSH_ALL 1 /* flush commit and real blocks */ 89 #define FLUSH_ALL 1 /* flush commit and real blocks */
90 #define COMMIT_NOW 2 /* end and commit this transaction */ 90 #define COMMIT_NOW 2 /* end and commit this transaction */
91 #define WAIT 4 /* wait for the log blocks to hit the disk */ 91 #define WAIT 4 /* wait for the log blocks to hit the disk */
92 92
93 static int do_journal_end(struct reiserfs_transaction_handle *, 93 static int do_journal_end(struct reiserfs_transaction_handle *,
94 struct super_block *, unsigned long nblocks, 94 struct super_block *, unsigned long nblocks,
95 int flags); 95 int flags);
96 static int flush_journal_list(struct super_block *s, 96 static int flush_journal_list(struct super_block *s,
97 struct reiserfs_journal_list *jl, int flushall); 97 struct reiserfs_journal_list *jl, int flushall);
98 static int flush_commit_list(struct super_block *s, 98 static int flush_commit_list(struct super_block *s,
99 struct reiserfs_journal_list *jl, int flushall); 99 struct reiserfs_journal_list *jl, int flushall);
100 static int can_dirty(struct reiserfs_journal_cnode *cn); 100 static int can_dirty(struct reiserfs_journal_cnode *cn);
101 static int journal_join(struct reiserfs_transaction_handle *th, 101 static int journal_join(struct reiserfs_transaction_handle *th,
102 struct super_block *p_s_sb, unsigned long nblocks); 102 struct super_block *p_s_sb, unsigned long nblocks);
103 static int release_journal_dev(struct super_block *super, 103 static int release_journal_dev(struct super_block *super,
104 struct reiserfs_journal *journal); 104 struct reiserfs_journal *journal);
105 static int dirty_one_transaction(struct super_block *s, 105 static int dirty_one_transaction(struct super_block *s,
106 struct reiserfs_journal_list *jl); 106 struct reiserfs_journal_list *jl);
107 static void flush_async_commits(void *p); 107 static void flush_async_commits(void *p);
108 static void queue_log_writer(struct super_block *s); 108 static void queue_log_writer(struct super_block *s);
109 109
110 /* values for join in do_journal_begin_r */ 110 /* values for join in do_journal_begin_r */
111 enum { 111 enum {
112 JBEGIN_REG = 0, /* regular journal begin */ 112 JBEGIN_REG = 0, /* regular journal begin */
113 JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ 113 JBEGIN_JOIN = 1, /* join the running transaction if at all possible */
114 JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ 114 JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */
115 }; 115 };
116 116
117 static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 117 static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
118 struct super_block *p_s_sb, 118 struct super_block *p_s_sb,
119 unsigned long nblocks, int join); 119 unsigned long nblocks, int join);
120 120
121 static void init_journal_hash(struct super_block *p_s_sb) 121 static void init_journal_hash(struct super_block *p_s_sb)
122 { 122 {
123 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 123 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
124 memset(journal->j_hash_table, 0, 124 memset(journal->j_hash_table, 0,
125 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 125 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
126 } 126 }
127 127
128 /* 128 /*
129 ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to 129 ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to
130 ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for 130 ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for
131 ** more details. 131 ** more details.
132 */ 132 */
133 static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) 133 static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
134 { 134 {
135 if (bh) { 135 if (bh) {
136 clear_buffer_dirty(bh); 136 clear_buffer_dirty(bh);
137 clear_buffer_journal_test(bh); 137 clear_buffer_journal_test(bh);
138 } 138 }
139 return 0; 139 return 0;
140 } 140 }
141 141
142 static void disable_barrier(struct super_block *s) 142 static void disable_barrier(struct super_block *s)
143 { 143 {
144 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); 144 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
145 printk("reiserfs: disabling flush barriers on %s\n", 145 printk("reiserfs: disabling flush barriers on %s\n",
146 reiserfs_bdevname(s)); 146 reiserfs_bdevname(s));
147 } 147 }
148 148
149 static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block 149 static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
150 *p_s_sb) 150 *p_s_sb)
151 { 151 {
152 struct reiserfs_bitmap_node *bn; 152 struct reiserfs_bitmap_node *bn;
153 static int id; 153 static int id;
154 154
155 bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS); 155 bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
156 if (!bn) { 156 if (!bn) {
157 return NULL; 157 return NULL;
158 } 158 }
159 bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS); 159 bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS);
160 if (!bn->data) { 160 if (!bn->data) {
161 kfree(bn); 161 kfree(bn);
162 return NULL; 162 return NULL;
163 } 163 }
164 bn->id = id++; 164 bn->id = id++;
165 INIT_LIST_HEAD(&bn->list); 165 INIT_LIST_HEAD(&bn->list);
166 return bn; 166 return bn;
167 } 167 }
168 168
169 static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb) 169 static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb)
170 { 170 {
171 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 171 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
172 struct reiserfs_bitmap_node *bn = NULL; 172 struct reiserfs_bitmap_node *bn = NULL;
173 struct list_head *entry = journal->j_bitmap_nodes.next; 173 struct list_head *entry = journal->j_bitmap_nodes.next;
174 174
175 journal->j_used_bitmap_nodes++; 175 journal->j_used_bitmap_nodes++;
176 repeat: 176 repeat:
177 177
178 if (entry != &journal->j_bitmap_nodes) { 178 if (entry != &journal->j_bitmap_nodes) {
179 bn = list_entry(entry, struct reiserfs_bitmap_node, list); 179 bn = list_entry(entry, struct reiserfs_bitmap_node, list);
180 list_del(entry); 180 list_del(entry);
181 memset(bn->data, 0, p_s_sb->s_blocksize); 181 memset(bn->data, 0, p_s_sb->s_blocksize);
182 journal->j_free_bitmap_nodes--; 182 journal->j_free_bitmap_nodes--;
183 return bn; 183 return bn;
184 } 184 }
185 bn = allocate_bitmap_node(p_s_sb); 185 bn = allocate_bitmap_node(p_s_sb);
186 if (!bn) { 186 if (!bn) {
187 yield(); 187 yield();
188 goto repeat; 188 goto repeat;
189 } 189 }
190 return bn; 190 return bn;
191 } 191 }
192 static inline void free_bitmap_node(struct super_block *p_s_sb, 192 static inline void free_bitmap_node(struct super_block *p_s_sb,
193 struct reiserfs_bitmap_node *bn) 193 struct reiserfs_bitmap_node *bn)
194 { 194 {
195 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 195 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
196 journal->j_used_bitmap_nodes--; 196 journal->j_used_bitmap_nodes--;
197 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { 197 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
198 kfree(bn->data); 198 kfree(bn->data);
199 kfree(bn); 199 kfree(bn);
200 } else { 200 } else {
201 list_add(&bn->list, &journal->j_bitmap_nodes); 201 list_add(&bn->list, &journal->j_bitmap_nodes);
202 journal->j_free_bitmap_nodes++; 202 journal->j_free_bitmap_nodes++;
203 } 203 }
204 } 204 }
205 205
206 static void allocate_bitmap_nodes(struct super_block *p_s_sb) 206 static void allocate_bitmap_nodes(struct super_block *p_s_sb)
207 { 207 {
208 int i; 208 int i;
209 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 209 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
210 struct reiserfs_bitmap_node *bn = NULL; 210 struct reiserfs_bitmap_node *bn = NULL;
211 for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { 211 for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
212 bn = allocate_bitmap_node(p_s_sb); 212 bn = allocate_bitmap_node(p_s_sb);
213 if (bn) { 213 if (bn) {
214 list_add(&bn->list, &journal->j_bitmap_nodes); 214 list_add(&bn->list, &journal->j_bitmap_nodes);
215 journal->j_free_bitmap_nodes++; 215 journal->j_free_bitmap_nodes++;
216 } else { 216 } else {
217 break; // this is ok, we'll try again when more are needed 217 break; // this is ok, we'll try again when more are needed
218 } 218 }
219 } 219 }
220 } 220 }
221 221
222 static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, 222 static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
223 struct reiserfs_list_bitmap *jb) 223 struct reiserfs_list_bitmap *jb)
224 { 224 {
225 int bmap_nr = block / (p_s_sb->s_blocksize << 3); 225 int bmap_nr = block / (p_s_sb->s_blocksize << 3);
226 int bit_nr = block % (p_s_sb->s_blocksize << 3); 226 int bit_nr = block % (p_s_sb->s_blocksize << 3);
227 227
228 if (!jb->bitmaps[bmap_nr]) { 228 if (!jb->bitmaps[bmap_nr]) {
229 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); 229 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
230 } 230 }
231 set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); 231 set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
232 return 0; 232 return 0;
233 } 233 }
234 234
235 static void cleanup_bitmap_list(struct super_block *p_s_sb, 235 static void cleanup_bitmap_list(struct super_block *p_s_sb,
236 struct reiserfs_list_bitmap *jb) 236 struct reiserfs_list_bitmap *jb)
237 { 237 {
238 int i; 238 int i;
239 if (jb->bitmaps == NULL) 239 if (jb->bitmaps == NULL)
240 return; 240 return;
241 241
242 for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) { 242 for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) {
243 if (jb->bitmaps[i]) { 243 if (jb->bitmaps[i]) {
244 free_bitmap_node(p_s_sb, jb->bitmaps[i]); 244 free_bitmap_node(p_s_sb, jb->bitmaps[i]);
245 jb->bitmaps[i] = NULL; 245 jb->bitmaps[i] = NULL;
246 } 246 }
247 } 247 }
248 } 248 }
249 249
250 /* 250 /*
251 ** only call this on FS unmount. 251 ** only call this on FS unmount.
252 */ 252 */
253 static int free_list_bitmaps(struct super_block *p_s_sb, 253 static int free_list_bitmaps(struct super_block *p_s_sb,
254 struct reiserfs_list_bitmap *jb_array) 254 struct reiserfs_list_bitmap *jb_array)
255 { 255 {
256 int i; 256 int i;
257 struct reiserfs_list_bitmap *jb; 257 struct reiserfs_list_bitmap *jb;
258 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 258 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
259 jb = jb_array + i; 259 jb = jb_array + i;
260 jb->journal_list = NULL; 260 jb->journal_list = NULL;
261 cleanup_bitmap_list(p_s_sb, jb); 261 cleanup_bitmap_list(p_s_sb, jb);
262 vfree(jb->bitmaps); 262 vfree(jb->bitmaps);
263 jb->bitmaps = NULL; 263 jb->bitmaps = NULL;
264 } 264 }
265 return 0; 265 return 0;
266 } 266 }
267 267
268 static int free_bitmap_nodes(struct super_block *p_s_sb) 268 static int free_bitmap_nodes(struct super_block *p_s_sb)
269 { 269 {
270 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 270 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
271 struct list_head *next = journal->j_bitmap_nodes.next; 271 struct list_head *next = journal->j_bitmap_nodes.next;
272 struct reiserfs_bitmap_node *bn; 272 struct reiserfs_bitmap_node *bn;
273 273
274 while (next != &journal->j_bitmap_nodes) { 274 while (next != &journal->j_bitmap_nodes) {
275 bn = list_entry(next, struct reiserfs_bitmap_node, list); 275 bn = list_entry(next, struct reiserfs_bitmap_node, list);
276 list_del(next); 276 list_del(next);
277 kfree(bn->data); 277 kfree(bn->data);
278 kfree(bn); 278 kfree(bn);
279 next = journal->j_bitmap_nodes.next; 279 next = journal->j_bitmap_nodes.next;
280 journal->j_free_bitmap_nodes--; 280 journal->j_free_bitmap_nodes--;
281 } 281 }
282 282
283 return 0; 283 return 0;
284 } 284 }
285 285
286 /* 286 /*
287 ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 287 ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
288 ** jb_array is the array to be filled in. 288 ** jb_array is the array to be filled in.
289 */ 289 */
290 int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, 290 int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
291 struct reiserfs_list_bitmap *jb_array, 291 struct reiserfs_list_bitmap *jb_array,
292 int bmap_nr) 292 int bmap_nr)
293 { 293 {
294 int i; 294 int i;
295 int failed = 0; 295 int failed = 0;
296 struct reiserfs_list_bitmap *jb; 296 struct reiserfs_list_bitmap *jb;
297 int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); 297 int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
298 298
299 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 299 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
300 jb = jb_array + i; 300 jb = jb_array + i;
301 jb->journal_list = NULL; 301 jb->journal_list = NULL;
302 jb->bitmaps = vmalloc(mem); 302 jb->bitmaps = vmalloc(mem);
303 if (!jb->bitmaps) { 303 if (!jb->bitmaps) {
304 reiserfs_warning(p_s_sb, 304 reiserfs_warning(p_s_sb,
305 "clm-2000, unable to allocate bitmaps for journal lists"); 305 "clm-2000, unable to allocate bitmaps for journal lists");
306 failed = 1; 306 failed = 1;
307 break; 307 break;
308 } 308 }
309 memset(jb->bitmaps, 0, mem); 309 memset(jb->bitmaps, 0, mem);
310 } 310 }
311 if (failed) { 311 if (failed) {
312 free_list_bitmaps(p_s_sb, jb_array); 312 free_list_bitmaps(p_s_sb, jb_array);
313 return -1; 313 return -1;
314 } 314 }
315 return 0; 315 return 0;
316 } 316 }
317 317
318 /* 318 /*
319 ** find an available list bitmap. If you can't find one, flush a commit list 319 ** find an available list bitmap. If you can't find one, flush a commit list
320 ** and try again 320 ** and try again
321 */ 321 */
322 static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb, 322 static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
323 struct reiserfs_journal_list 323 struct reiserfs_journal_list
324 *jl) 324 *jl)
325 { 325 {
326 int i, j; 326 int i, j;
327 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 327 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
328 struct reiserfs_list_bitmap *jb = NULL; 328 struct reiserfs_list_bitmap *jb = NULL;
329 329
330 for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { 330 for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
331 i = journal->j_list_bitmap_index; 331 i = journal->j_list_bitmap_index;
332 journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; 332 journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
333 jb = journal->j_list_bitmap + i; 333 jb = journal->j_list_bitmap + i;
334 if (journal->j_list_bitmap[i].journal_list) { 334 if (journal->j_list_bitmap[i].journal_list) {
335 flush_commit_list(p_s_sb, 335 flush_commit_list(p_s_sb,
336 journal->j_list_bitmap[i]. 336 journal->j_list_bitmap[i].
337 journal_list, 1); 337 journal_list, 1);
338 if (!journal->j_list_bitmap[i].journal_list) { 338 if (!journal->j_list_bitmap[i].journal_list) {
339 break; 339 break;
340 } 340 }
341 } else { 341 } else {
342 break; 342 break;
343 } 343 }
344 } 344 }
345 if (jb->journal_list) { /* double check to make sure if flushed correctly */ 345 if (jb->journal_list) { /* double check to make sure if flushed correctly */
346 return NULL; 346 return NULL;
347 } 347 }
348 jb->journal_list = jl; 348 jb->journal_list = jl;
349 return jb; 349 return jb;
350 } 350 }
351 351
352 /* 352 /*
353 ** allocates a new chunk of X nodes, and links them all together as a list. 353 ** allocates a new chunk of X nodes, and links them all together as a list.
354 ** Uses the cnode->next and cnode->prev pointers 354 ** Uses the cnode->next and cnode->prev pointers
355 ** returns NULL on failure 355 ** returns NULL on failure
356 */ 356 */
357 static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) 357 static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
358 { 358 {
359 struct reiserfs_journal_cnode *head; 359 struct reiserfs_journal_cnode *head;
360 int i; 360 int i;
361 if (num_cnodes <= 0) { 361 if (num_cnodes <= 0) {
362 return NULL; 362 return NULL;
363 } 363 }
364 head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); 364 head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
365 if (!head) { 365 if (!head) {
366 return NULL; 366 return NULL;
367 } 367 }
368 memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)); 368 memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode));
369 head[0].prev = NULL; 369 head[0].prev = NULL;
370 head[0].next = head + 1; 370 head[0].next = head + 1;
371 for (i = 1; i < num_cnodes; i++) { 371 for (i = 1; i < num_cnodes; i++) {
372 head[i].prev = head + (i - 1); 372 head[i].prev = head + (i - 1);
373 head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ 373 head[i].next = head + (i + 1); /* if last one, overwrite it after the if */
374 } 374 }
375 head[num_cnodes - 1].next = NULL; 375 head[num_cnodes - 1].next = NULL;
376 return head; 376 return head;
377 } 377 }
378 378
379 /* 379 /*
380 ** pulls a cnode off the free list, or returns NULL on failure 380 ** pulls a cnode off the free list, or returns NULL on failure
381 */ 381 */
382 static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) 382 static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb)
383 { 383 {
384 struct reiserfs_journal_cnode *cn; 384 struct reiserfs_journal_cnode *cn;
385 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 385 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
386 386
387 reiserfs_check_lock_depth(p_s_sb, "get_cnode"); 387 reiserfs_check_lock_depth(p_s_sb, "get_cnode");
388 388
389 if (journal->j_cnode_free <= 0) { 389 if (journal->j_cnode_free <= 0) {
390 return NULL; 390 return NULL;
391 } 391 }
392 journal->j_cnode_used++; 392 journal->j_cnode_used++;
393 journal->j_cnode_free--; 393 journal->j_cnode_free--;
394 cn = journal->j_cnode_free_list; 394 cn = journal->j_cnode_free_list;
395 if (!cn) { 395 if (!cn) {
396 return cn; 396 return cn;
397 } 397 }
398 if (cn->next) { 398 if (cn->next) {
399 cn->next->prev = NULL; 399 cn->next->prev = NULL;
400 } 400 }
401 journal->j_cnode_free_list = cn->next; 401 journal->j_cnode_free_list = cn->next;
402 memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); 402 memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
403 return cn; 403 return cn;
404 } 404 }
405 405
406 /* 406 /*
407 ** returns a cnode to the free list 407 ** returns a cnode to the free list
408 */ 408 */
409 static void free_cnode(struct super_block *p_s_sb, 409 static void free_cnode(struct super_block *p_s_sb,
410 struct reiserfs_journal_cnode *cn) 410 struct reiserfs_journal_cnode *cn)
411 { 411 {
412 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 412 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
413 413
414 reiserfs_check_lock_depth(p_s_sb, "free_cnode"); 414 reiserfs_check_lock_depth(p_s_sb, "free_cnode");
415 415
416 journal->j_cnode_used--; 416 journal->j_cnode_used--;
417 journal->j_cnode_free++; 417 journal->j_cnode_free++;
418 /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ 418 /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
419 cn->next = journal->j_cnode_free_list; 419 cn->next = journal->j_cnode_free_list;
420 if (journal->j_cnode_free_list) { 420 if (journal->j_cnode_free_list) {
421 journal->j_cnode_free_list->prev = cn; 421 journal->j_cnode_free_list->prev = cn;
422 } 422 }
423 cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ 423 cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */
424 journal->j_cnode_free_list = cn; 424 journal->j_cnode_free_list = cn;
425 } 425 }
426 426
427 static void clear_prepared_bits(struct buffer_head *bh) 427 static void clear_prepared_bits(struct buffer_head *bh)
428 { 428 {
429 clear_buffer_journal_prepared(bh); 429 clear_buffer_journal_prepared(bh);
430 clear_buffer_journal_restore_dirty(bh); 430 clear_buffer_journal_restore_dirty(bh);
431 } 431 }
432 432
433 /* utility function to force a BUG if it is called without the big 433 /* utility function to force a BUG if it is called without the big
434 ** kernel lock held. caller is the string printed just before calling BUG() 434 ** kernel lock held. caller is the string printed just before calling BUG()
435 */ 435 */
436 void reiserfs_check_lock_depth(struct super_block *sb, char *caller) 436 void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
437 { 437 {
438 #ifdef CONFIG_SMP 438 #ifdef CONFIG_SMP
439 if (current->lock_depth < 0) { 439 if (current->lock_depth < 0) {
440 reiserfs_panic(sb, "%s called without kernel lock held", 440 reiserfs_panic(sb, "%s called without kernel lock held",
441 caller); 441 caller);
442 } 442 }
443 #else 443 #else
444 ; 444 ;
445 #endif 445 #endif
446 } 446 }
447 447
448 /* return a cnode with same dev, block number and size in table, or null if not found */ 448 /* return a cnode with same dev, block number and size in table, or null if not found */
449 static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 449 static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
450 super_block 450 super_block
451 *sb, 451 *sb,
452 struct 452 struct
453 reiserfs_journal_cnode 453 reiserfs_journal_cnode
454 **table, 454 **table,
455 long bl) 455 long bl)
456 { 456 {
457 struct reiserfs_journal_cnode *cn; 457 struct reiserfs_journal_cnode *cn;
458 cn = journal_hash(table, sb, bl); 458 cn = journal_hash(table, sb, bl);
459 while (cn) { 459 while (cn) {
460 if (cn->blocknr == bl && cn->sb == sb) 460 if (cn->blocknr == bl && cn->sb == sb)
461 return cn; 461 return cn;
462 cn = cn->hnext; 462 cn = cn->hnext;
463 } 463 }
464 return (struct reiserfs_journal_cnode *)0; 464 return (struct reiserfs_journal_cnode *)0;
465 } 465 }
466 466
467 /* 467 /*
468 ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated 468 ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated
469 ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever 469 ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
470 ** being overwritten by a replay after crashing. 470 ** being overwritten by a replay after crashing.
471 ** 471 **
472 ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting 472 ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting
473 ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make 473 ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make
474 ** sure you never write the block without logging it. 474 ** sure you never write the block without logging it.
475 ** 475 **
476 ** next_zero_bit is a suggestion about the next block to try for find_forward. 476 ** next_zero_bit is a suggestion about the next block to try for find_forward.
477 ** when bl is rejected because it is set in a journal list bitmap, we search 477 ** when bl is rejected because it is set in a journal list bitmap, we search
478 ** for the next zero bit in the bitmap that rejected bl. Then, we return that 478 ** for the next zero bit in the bitmap that rejected bl. Then, we return that
479 ** through next_zero_bit for find_forward to try. 479 ** through next_zero_bit for find_forward to try.
480 ** 480 **
481 ** Just because we return something in next_zero_bit does not mean we won't 481 ** Just because we return something in next_zero_bit does not mean we won't
482 ** reject it on the next call to reiserfs_in_journal 482 ** reject it on the next call to reiserfs_in_journal
483 ** 483 **
484 */ 484 */
485 int reiserfs_in_journal(struct super_block *p_s_sb, 485 int reiserfs_in_journal(struct super_block *p_s_sb,
486 int bmap_nr, int bit_nr, int search_all, 486 int bmap_nr, int bit_nr, int search_all,
487 b_blocknr_t * next_zero_bit) 487 b_blocknr_t * next_zero_bit)
488 { 488 {
489 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 489 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
490 struct reiserfs_journal_cnode *cn; 490 struct reiserfs_journal_cnode *cn;
491 struct reiserfs_list_bitmap *jb; 491 struct reiserfs_list_bitmap *jb;
492 int i; 492 int i;
493 unsigned long bl; 493 unsigned long bl;
494 494
495 *next_zero_bit = 0; /* always start this at zero. */ 495 *next_zero_bit = 0; /* always start this at zero. */
496 496
497 PROC_INFO_INC(p_s_sb, journal.in_journal); 497 PROC_INFO_INC(p_s_sb, journal.in_journal);
498 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 498 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
499 ** if we crash before the transaction that freed it commits, this transaction won't 499 ** if we crash before the transaction that freed it commits, this transaction won't
500 ** have committed either, and the block will never be written 500 ** have committed either, and the block will never be written
501 */ 501 */
502 if (search_all) { 502 if (search_all) {
503 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 503 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
504 PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap); 504 PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap);
505 jb = journal->j_list_bitmap + i; 505 jb = journal->j_list_bitmap + i;
506 if (jb->journal_list && jb->bitmaps[bmap_nr] && 506 if (jb->journal_list && jb->bitmaps[bmap_nr] &&
507 test_bit(bit_nr, 507 test_bit(bit_nr,
508 (unsigned long *)jb->bitmaps[bmap_nr]-> 508 (unsigned long *)jb->bitmaps[bmap_nr]->
509 data)) { 509 data)) {
510 *next_zero_bit = 510 *next_zero_bit =
511 find_next_zero_bit((unsigned long *) 511 find_next_zero_bit((unsigned long *)
512 (jb->bitmaps[bmap_nr]-> 512 (jb->bitmaps[bmap_nr]->
513 data), 513 data),
514 p_s_sb->s_blocksize << 3, 514 p_s_sb->s_blocksize << 3,
515 bit_nr + 1); 515 bit_nr + 1);
516 return 1; 516 return 1;
517 } 517 }
518 } 518 }
519 } 519 }
520 520
521 bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; 521 bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
522 /* is it in any old transactions? */ 522 /* is it in any old transactions? */
523 if (search_all 523 if (search_all
524 && (cn = 524 && (cn =
525 get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { 525 get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) {
526 return 1; 526 return 1;
527 } 527 }
528 528
529 /* is it in the current transaction. This should never happen */ 529 /* is it in the current transaction. This should never happen */
530 if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { 530 if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) {
531 BUG(); 531 BUG();
532 return 1; 532 return 1;
533 } 533 }
534 534
535 PROC_INFO_INC(p_s_sb, journal.in_journal_reusable); 535 PROC_INFO_INC(p_s_sb, journal.in_journal_reusable);
536 /* safe for reuse */ 536 /* safe for reuse */
537 return 0; 537 return 0;
538 } 538 }
539 539
540 /* insert cn into table 540 /* insert cn into table
541 */ 541 */
542 static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, 542 static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
543 struct reiserfs_journal_cnode *cn) 543 struct reiserfs_journal_cnode *cn)
544 { 544 {
545 struct reiserfs_journal_cnode *cn_orig; 545 struct reiserfs_journal_cnode *cn_orig;
546 546
547 cn_orig = journal_hash(table, cn->sb, cn->blocknr); 547 cn_orig = journal_hash(table, cn->sb, cn->blocknr);
548 cn->hnext = cn_orig; 548 cn->hnext = cn_orig;
549 cn->hprev = NULL; 549 cn->hprev = NULL;
550 if (cn_orig) { 550 if (cn_orig) {
551 cn_orig->hprev = cn; 551 cn_orig->hprev = cn;
552 } 552 }
553 journal_hash(table, cn->sb, cn->blocknr) = cn; 553 journal_hash(table, cn->sb, cn->blocknr) = cn;
554 } 554 }
555 555
556 /* lock the current transaction */ 556 /* lock the current transaction */
557 static inline void lock_journal(struct super_block *p_s_sb) 557 static inline void lock_journal(struct super_block *p_s_sb)
558 { 558 {
559 PROC_INFO_INC(p_s_sb, journal.lock_journal); 559 PROC_INFO_INC(p_s_sb, journal.lock_journal);
560 down(&SB_JOURNAL(p_s_sb)->j_lock); 560 down(&SB_JOURNAL(p_s_sb)->j_lock);
561 } 561 }
562 562
563 /* unlock the current transaction */ 563 /* unlock the current transaction */
564 static inline void unlock_journal(struct super_block *p_s_sb) 564 static inline void unlock_journal(struct super_block *p_s_sb)
565 { 565 {
566 up(&SB_JOURNAL(p_s_sb)->j_lock); 566 up(&SB_JOURNAL(p_s_sb)->j_lock);
567 } 567 }
568 568
569 static inline void get_journal_list(struct reiserfs_journal_list *jl) 569 static inline void get_journal_list(struct reiserfs_journal_list *jl)
570 { 570 {
571 jl->j_refcount++; 571 jl->j_refcount++;
572 } 572 }
573 573
574 static inline void put_journal_list(struct super_block *s, 574 static inline void put_journal_list(struct super_block *s,
575 struct reiserfs_journal_list *jl) 575 struct reiserfs_journal_list *jl)
576 { 576 {
577 if (jl->j_refcount < 1) { 577 if (jl->j_refcount < 1) {
578 reiserfs_panic(s, "trans id %lu, refcount at %d", 578 reiserfs_panic(s, "trans id %lu, refcount at %d",
579 jl->j_trans_id, jl->j_refcount); 579 jl->j_trans_id, jl->j_refcount);
580 } 580 }
581 if (--jl->j_refcount == 0) 581 if (--jl->j_refcount == 0)
582 kfree(jl); 582 kfree(jl);
583 } 583 }
584 584
585 /* 585 /*
586 ** this used to be much more involved, and I'm keeping it just in case things get ugly again. 586 ** this used to be much more involved, and I'm keeping it just in case things get ugly again.
587 ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 587 ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
588 ** transaction. 588 ** transaction.
589 */ 589 */
590 static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, 590 static void cleanup_freed_for_journal_list(struct super_block *p_s_sb,
591 struct reiserfs_journal_list *jl) 591 struct reiserfs_journal_list *jl)
592 { 592 {
593 593
594 struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; 594 struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
595 if (jb) { 595 if (jb) {
596 cleanup_bitmap_list(p_s_sb, jb); 596 cleanup_bitmap_list(p_s_sb, jb);
597 } 597 }
598 jl->j_list_bitmap->journal_list = NULL; 598 jl->j_list_bitmap->journal_list = NULL;
599 jl->j_list_bitmap = NULL; 599 jl->j_list_bitmap = NULL;
600 } 600 }
601 601
602 static int journal_list_still_alive(struct super_block *s, 602 static int journal_list_still_alive(struct super_block *s,
603 unsigned long trans_id) 603 unsigned long trans_id)
604 { 604 {
605 struct reiserfs_journal *journal = SB_JOURNAL(s); 605 struct reiserfs_journal *journal = SB_JOURNAL(s);
606 struct list_head *entry = &journal->j_journal_list; 606 struct list_head *entry = &journal->j_journal_list;
607 struct reiserfs_journal_list *jl; 607 struct reiserfs_journal_list *jl;
608 608
609 if (!list_empty(entry)) { 609 if (!list_empty(entry)) {
610 jl = JOURNAL_LIST_ENTRY(entry->next); 610 jl = JOURNAL_LIST_ENTRY(entry->next);
611 if (jl->j_trans_id <= trans_id) { 611 if (jl->j_trans_id <= trans_id) {
612 return 1; 612 return 1;
613 } 613 }
614 } 614 }
615 return 0; 615 return 0;
616 } 616 }
617 617
618 static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 618 static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
619 { 619 {
620 char b[BDEVNAME_SIZE]; 620 char b[BDEVNAME_SIZE];
621 621
622 if (buffer_journaled(bh)) { 622 if (buffer_journaled(bh)) {
623 reiserfs_warning(NULL, 623 reiserfs_warning(NULL,
624 "clm-2084: pinned buffer %lu:%s sent to disk", 624 "clm-2084: pinned buffer %lu:%s sent to disk",
625 bh->b_blocknr, bdevname(bh->b_bdev, b)); 625 bh->b_blocknr, bdevname(bh->b_bdev, b));
626 } 626 }
627 if (uptodate) 627 if (uptodate)
628 set_buffer_uptodate(bh); 628 set_buffer_uptodate(bh);
629 else 629 else
630 clear_buffer_uptodate(bh); 630 clear_buffer_uptodate(bh);
631 unlock_buffer(bh); 631 unlock_buffer(bh);
632 put_bh(bh); 632 put_bh(bh);
633 } 633 }
634 634
635 static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) 635 static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
636 { 636 {
637 if (uptodate) 637 if (uptodate)
638 set_buffer_uptodate(bh); 638 set_buffer_uptodate(bh);
639 else 639 else
640 clear_buffer_uptodate(bh); 640 clear_buffer_uptodate(bh);
641 unlock_buffer(bh); 641 unlock_buffer(bh);
642 put_bh(bh); 642 put_bh(bh);
643 } 643 }
644 644
645 static void submit_logged_buffer(struct buffer_head *bh) 645 static void submit_logged_buffer(struct buffer_head *bh)
646 { 646 {
647 get_bh(bh); 647 get_bh(bh);
648 bh->b_end_io = reiserfs_end_buffer_io_sync; 648 bh->b_end_io = reiserfs_end_buffer_io_sync;
649 clear_buffer_journal_new(bh); 649 clear_buffer_journal_new(bh);
650 clear_buffer_dirty(bh); 650 clear_buffer_dirty(bh);
651 if (!test_clear_buffer_journal_test(bh)) 651 if (!test_clear_buffer_journal_test(bh))
652 BUG(); 652 BUG();
653 if (!buffer_uptodate(bh)) 653 if (!buffer_uptodate(bh))
654 BUG(); 654 BUG();
655 submit_bh(WRITE, bh); 655 submit_bh(WRITE, bh);
656 } 656 }
657 657
658 static void submit_ordered_buffer(struct buffer_head *bh) 658 static void submit_ordered_buffer(struct buffer_head *bh)
659 { 659 {
660 get_bh(bh); 660 get_bh(bh);
661 bh->b_end_io = reiserfs_end_ordered_io; 661 bh->b_end_io = reiserfs_end_ordered_io;
662 clear_buffer_dirty(bh); 662 clear_buffer_dirty(bh);
663 if (!buffer_uptodate(bh)) 663 if (!buffer_uptodate(bh))
664 BUG(); 664 BUG();
665 submit_bh(WRITE, bh); 665 submit_bh(WRITE, bh);
666 } 666 }
667 667
668 static int submit_barrier_buffer(struct buffer_head *bh) 668 static int submit_barrier_buffer(struct buffer_head *bh)
669 { 669 {
670 get_bh(bh); 670 get_bh(bh);
671 bh->b_end_io = reiserfs_end_ordered_io; 671 bh->b_end_io = reiserfs_end_ordered_io;
672 clear_buffer_dirty(bh); 672 clear_buffer_dirty(bh);
673 if (!buffer_uptodate(bh)) 673 if (!buffer_uptodate(bh))
674 BUG(); 674 BUG();
675 return submit_bh(WRITE_BARRIER, bh); 675 return submit_bh(WRITE_BARRIER, bh);
676 } 676 }
677 677
678 static void check_barrier_completion(struct super_block *s, 678 static void check_barrier_completion(struct super_block *s,
679 struct buffer_head *bh) 679 struct buffer_head *bh)
680 { 680 {
681 if (buffer_eopnotsupp(bh)) { 681 if (buffer_eopnotsupp(bh)) {
682 clear_buffer_eopnotsupp(bh); 682 clear_buffer_eopnotsupp(bh);
683 disable_barrier(s); 683 disable_barrier(s);
684 set_buffer_uptodate(bh); 684 set_buffer_uptodate(bh);
685 set_buffer_dirty(bh); 685 set_buffer_dirty(bh);
686 sync_dirty_buffer(bh); 686 sync_dirty_buffer(bh);
687 } 687 }
688 } 688 }
689 689
690 #define CHUNK_SIZE 32 690 #define CHUNK_SIZE 32
691 struct buffer_chunk { 691 struct buffer_chunk {
692 struct buffer_head *bh[CHUNK_SIZE]; 692 struct buffer_head *bh[CHUNK_SIZE];
693 int nr; 693 int nr;
694 }; 694 };
695 695
696 static void write_chunk(struct buffer_chunk *chunk) 696 static void write_chunk(struct buffer_chunk *chunk)
697 { 697 {
698 int i; 698 int i;
699 get_fs_excl(); 699 get_fs_excl();
700 for (i = 0; i < chunk->nr; i++) { 700 for (i = 0; i < chunk->nr; i++) {
701 submit_logged_buffer(chunk->bh[i]); 701 submit_logged_buffer(chunk->bh[i]);
702 } 702 }
703 chunk->nr = 0; 703 chunk->nr = 0;
704 put_fs_excl(); 704 put_fs_excl();
705 } 705 }
706 706
707 static void write_ordered_chunk(struct buffer_chunk *chunk) 707 static void write_ordered_chunk(struct buffer_chunk *chunk)
708 { 708 {
709 int i; 709 int i;
710 get_fs_excl(); 710 get_fs_excl();
711 for (i = 0; i < chunk->nr; i++) { 711 for (i = 0; i < chunk->nr; i++) {
712 submit_ordered_buffer(chunk->bh[i]); 712 submit_ordered_buffer(chunk->bh[i]);
713 } 713 }
714 chunk->nr = 0; 714 chunk->nr = 0;
715 put_fs_excl(); 715 put_fs_excl();
716 } 716 }
717 717
718 static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 718 static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
719 spinlock_t * lock, void (fn) (struct buffer_chunk *)) 719 spinlock_t * lock, void (fn) (struct buffer_chunk *))
720 { 720 {
721 int ret = 0; 721 int ret = 0;
722 if (chunk->nr >= CHUNK_SIZE) 722 if (chunk->nr >= CHUNK_SIZE)
723 BUG(); 723 BUG();
724 chunk->bh[chunk->nr++] = bh; 724 chunk->bh[chunk->nr++] = bh;
725 if (chunk->nr >= CHUNK_SIZE) { 725 if (chunk->nr >= CHUNK_SIZE) {
726 ret = 1; 726 ret = 1;
727 if (lock) 727 if (lock)
728 spin_unlock(lock); 728 spin_unlock(lock);
729 fn(chunk); 729 fn(chunk);
730 if (lock) 730 if (lock)
731 spin_lock(lock); 731 spin_lock(lock);
732 } 732 }
733 return ret; 733 return ret;
734 } 734 }
735 735
736 static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); 736 static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
737 static struct reiserfs_jh *alloc_jh(void) 737 static struct reiserfs_jh *alloc_jh(void)
738 { 738 {
739 struct reiserfs_jh *jh; 739 struct reiserfs_jh *jh;
740 while (1) { 740 while (1) {
741 jh = kmalloc(sizeof(*jh), GFP_NOFS); 741 jh = kmalloc(sizeof(*jh), GFP_NOFS);
742 if (jh) { 742 if (jh) {
743 atomic_inc(&nr_reiserfs_jh); 743 atomic_inc(&nr_reiserfs_jh);
744 return jh; 744 return jh;
745 } 745 }
746 yield(); 746 yield();
747 } 747 }
748 } 748 }
749 749
750 /* 750 /*
751 * we want to free the jh when the buffer has been written 751 * we want to free the jh when the buffer has been written
752 * and waited on 752 * and waited on
753 */ 753 */
754 void reiserfs_free_jh(struct buffer_head *bh) 754 void reiserfs_free_jh(struct buffer_head *bh)
755 { 755 {
756 struct reiserfs_jh *jh; 756 struct reiserfs_jh *jh;
757 757
758 jh = bh->b_private; 758 jh = bh->b_private;
759 if (jh) { 759 if (jh) {
760 bh->b_private = NULL; 760 bh->b_private = NULL;
761 jh->bh = NULL; 761 jh->bh = NULL;
762 list_del_init(&jh->list); 762 list_del_init(&jh->list);
763 kfree(jh); 763 kfree(jh);
764 if (atomic_read(&nr_reiserfs_jh) <= 0) 764 if (atomic_read(&nr_reiserfs_jh) <= 0)
765 BUG(); 765 BUG();
766 atomic_dec(&nr_reiserfs_jh); 766 atomic_dec(&nr_reiserfs_jh);
767 put_bh(bh); 767 put_bh(bh);
768 } 768 }
769 } 769 }
770 770
771 static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, 771 static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
772 int tail) 772 int tail)
773 { 773 {
774 struct reiserfs_jh *jh; 774 struct reiserfs_jh *jh;
775 775
776 if (bh->b_private) { 776 if (bh->b_private) {
777 spin_lock(&j->j_dirty_buffers_lock); 777 spin_lock(&j->j_dirty_buffers_lock);
778 if (!bh->b_private) { 778 if (!bh->b_private) {
779 spin_unlock(&j->j_dirty_buffers_lock); 779 spin_unlock(&j->j_dirty_buffers_lock);
780 goto no_jh; 780 goto no_jh;
781 } 781 }
782 jh = bh->b_private; 782 jh = bh->b_private;
783 list_del_init(&jh->list); 783 list_del_init(&jh->list);
784 } else { 784 } else {
785 no_jh: 785 no_jh:
786 get_bh(bh); 786 get_bh(bh);
787 jh = alloc_jh(); 787 jh = alloc_jh();
788 spin_lock(&j->j_dirty_buffers_lock); 788 spin_lock(&j->j_dirty_buffers_lock);
789 /* buffer must be locked for __add_jh, should be able to have 789 /* buffer must be locked for __add_jh, should be able to have
790 * two adds at the same time 790 * two adds at the same time
791 */ 791 */
792 if (bh->b_private) 792 if (bh->b_private)
793 BUG(); 793 BUG();
794 jh->bh = bh; 794 jh->bh = bh;
795 bh->b_private = jh; 795 bh->b_private = jh;
796 } 796 }
797 jh->jl = j->j_current_jl; 797 jh->jl = j->j_current_jl;
798 if (tail) 798 if (tail)
799 list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); 799 list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
800 else { 800 else {
801 list_add_tail(&jh->list, &jh->jl->j_bh_list); 801 list_add_tail(&jh->list, &jh->jl->j_bh_list);
802 } 802 }
803 spin_unlock(&j->j_dirty_buffers_lock); 803 spin_unlock(&j->j_dirty_buffers_lock);
804 return 0; 804 return 0;
805 } 805 }
806 806
807 int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) 807 int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
808 { 808 {
809 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); 809 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
810 } 810 }
811 int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) 811 int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
812 { 812 {
813 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); 813 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
814 } 814 }
815 815
816 #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) 816 #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
817 static int write_ordered_buffers(spinlock_t * lock, 817 static int write_ordered_buffers(spinlock_t * lock,
818 struct reiserfs_journal *j, 818 struct reiserfs_journal *j,
819 struct reiserfs_journal_list *jl, 819 struct reiserfs_journal_list *jl,
820 struct list_head *list) 820 struct list_head *list)
821 { 821 {
822 struct buffer_head *bh; 822 struct buffer_head *bh;
823 struct reiserfs_jh *jh; 823 struct reiserfs_jh *jh;
824 int ret = j->j_errno; 824 int ret = j->j_errno;
825 struct buffer_chunk chunk; 825 struct buffer_chunk chunk;
826 struct list_head tmp; 826 struct list_head tmp;
827 INIT_LIST_HEAD(&tmp); 827 INIT_LIST_HEAD(&tmp);
828 828
829 chunk.nr = 0; 829 chunk.nr = 0;
830 spin_lock(lock); 830 spin_lock(lock);
831 while (!list_empty(list)) { 831 while (!list_empty(list)) {
832 jh = JH_ENTRY(list->next); 832 jh = JH_ENTRY(list->next);
833 bh = jh->bh; 833 bh = jh->bh;
834 get_bh(bh); 834 get_bh(bh);
835 if (test_set_buffer_locked(bh)) { 835 if (test_set_buffer_locked(bh)) {
836 if (!buffer_dirty(bh)) { 836 if (!buffer_dirty(bh)) {
837 list_del_init(&jh->list); 837 list_del_init(&jh->list);
838 list_add(&jh->list, &tmp); 838 list_add(&jh->list, &tmp);
839 goto loop_next; 839 goto loop_next;
840 } 840 }
841 spin_unlock(lock); 841 spin_unlock(lock);
842 if (chunk.nr) 842 if (chunk.nr)
843 write_ordered_chunk(&chunk); 843 write_ordered_chunk(&chunk);
844 wait_on_buffer(bh); 844 wait_on_buffer(bh);
845 cond_resched(); 845 cond_resched();
846 spin_lock(lock); 846 spin_lock(lock);
847 goto loop_next; 847 goto loop_next;
848 } 848 }
849 if (buffer_dirty(bh)) { 849 if (buffer_dirty(bh)) {
850 list_del_init(&jh->list); 850 list_del_init(&jh->list);
851 list_add(&jh->list, &tmp); 851 list_add(&jh->list, &tmp);
852 add_to_chunk(&chunk, bh, lock, write_ordered_chunk); 852 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
853 } else { 853 } else {
854 reiserfs_free_jh(bh); 854 reiserfs_free_jh(bh);
855 unlock_buffer(bh); 855 unlock_buffer(bh);
856 } 856 }
857 loop_next: 857 loop_next:
858 put_bh(bh); 858 put_bh(bh);
859 cond_resched_lock(lock); 859 cond_resched_lock(lock);
860 } 860 }
861 if (chunk.nr) { 861 if (chunk.nr) {
862 spin_unlock(lock); 862 spin_unlock(lock);
863 write_ordered_chunk(&chunk); 863 write_ordered_chunk(&chunk);
864 spin_lock(lock); 864 spin_lock(lock);
865 } 865 }
866 while (!list_empty(&tmp)) { 866 while (!list_empty(&tmp)) {
867 jh = JH_ENTRY(tmp.prev); 867 jh = JH_ENTRY(tmp.prev);
868 bh = jh->bh; 868 bh = jh->bh;
869 get_bh(bh); 869 get_bh(bh);
870 reiserfs_free_jh(bh); 870 reiserfs_free_jh(bh);
871 871
872 if (buffer_locked(bh)) { 872 if (buffer_locked(bh)) {
873 spin_unlock(lock); 873 spin_unlock(lock);
874 wait_on_buffer(bh); 874 wait_on_buffer(bh);
875 spin_lock(lock); 875 spin_lock(lock);
876 } 876 }
877 if (!buffer_uptodate(bh)) { 877 if (!buffer_uptodate(bh)) {
878 ret = -EIO; 878 ret = -EIO;
879 } 879 }
880 /* ugly interaction with invalidatepage here. 880 /* ugly interaction with invalidatepage here.
881 * reiserfs_invalidate_page will pin any buffer that has a valid 881 * reiserfs_invalidate_page will pin any buffer that has a valid
882 * journal head from an older transaction. If someone else sets 882 * journal head from an older transaction. If someone else sets
883 * our buffer dirty after we write it in the first loop, and 883 * our buffer dirty after we write it in the first loop, and
884 * then someone truncates the page away, nobody will ever write 884 * then someone truncates the page away, nobody will ever write
885 * the buffer. We're safe if we write the page one last time 885 * the buffer. We're safe if we write the page one last time
886 * after freeing the journal header. 886 * after freeing the journal header.
887 */ 887 */
888 if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { 888 if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
889 spin_unlock(lock); 889 spin_unlock(lock);
890 ll_rw_block(WRITE, 1, &bh); 890 ll_rw_block(WRITE, 1, &bh);
891 spin_lock(lock); 891 spin_lock(lock);
892 } 892 }
893 put_bh(bh); 893 put_bh(bh);
894 cond_resched_lock(lock); 894 cond_resched_lock(lock);
895 } 895 }
896 spin_unlock(lock); 896 spin_unlock(lock);
897 return ret; 897 return ret;
898 } 898 }
899 899
900 static int flush_older_commits(struct super_block *s, 900 static int flush_older_commits(struct super_block *s,
901 struct reiserfs_journal_list *jl) 901 struct reiserfs_journal_list *jl)
902 { 902 {
903 struct reiserfs_journal *journal = SB_JOURNAL(s); 903 struct reiserfs_journal *journal = SB_JOURNAL(s);
904 struct reiserfs_journal_list *other_jl; 904 struct reiserfs_journal_list *other_jl;
905 struct reiserfs_journal_list *first_jl; 905 struct reiserfs_journal_list *first_jl;
906 struct list_head *entry; 906 struct list_head *entry;
907 unsigned long trans_id = jl->j_trans_id; 907 unsigned long trans_id = jl->j_trans_id;
908 unsigned long other_trans_id; 908 unsigned long other_trans_id;
909 unsigned long first_trans_id; 909 unsigned long first_trans_id;
910 910
911 find_first: 911 find_first:
912 /* 912 /*
913 * first we walk backwards to find the oldest uncommitted transation 913 * first we walk backwards to find the oldest uncommitted transation
914 */ 914 */
915 first_jl = jl; 915 first_jl = jl;
916 entry = jl->j_list.prev; 916 entry = jl->j_list.prev;
917 while (1) { 917 while (1) {
918 other_jl = JOURNAL_LIST_ENTRY(entry); 918 other_jl = JOURNAL_LIST_ENTRY(entry);
919 if (entry == &journal->j_journal_list || 919 if (entry == &journal->j_journal_list ||
920 atomic_read(&other_jl->j_older_commits_done)) 920 atomic_read(&other_jl->j_older_commits_done))
921 break; 921 break;
922 922
923 first_jl = other_jl; 923 first_jl = other_jl;
924 entry = other_jl->j_list.prev; 924 entry = other_jl->j_list.prev;
925 } 925 }
926 926
927 /* if we didn't find any older uncommitted transactions, return now */ 927 /* if we didn't find any older uncommitted transactions, return now */
928 if (first_jl == jl) { 928 if (first_jl == jl) {
929 return 0; 929 return 0;
930 } 930 }
931 931
932 first_trans_id = first_jl->j_trans_id; 932 first_trans_id = first_jl->j_trans_id;
933 933
934 entry = &first_jl->j_list; 934 entry = &first_jl->j_list;
935 while (1) { 935 while (1) {
936 other_jl = JOURNAL_LIST_ENTRY(entry); 936 other_jl = JOURNAL_LIST_ENTRY(entry);
937 other_trans_id = other_jl->j_trans_id; 937 other_trans_id = other_jl->j_trans_id;
938 938
939 if (other_trans_id < trans_id) { 939 if (other_trans_id < trans_id) {
940 if (atomic_read(&other_jl->j_commit_left) != 0) { 940 if (atomic_read(&other_jl->j_commit_left) != 0) {
941 flush_commit_list(s, other_jl, 0); 941 flush_commit_list(s, other_jl, 0);
942 942
943 /* list we were called with is gone, return */ 943 /* list we were called with is gone, return */
944 if (!journal_list_still_alive(s, trans_id)) 944 if (!journal_list_still_alive(s, trans_id))
945 return 1; 945 return 1;
946 946
947 /* the one we just flushed is gone, this means all 947 /* the one we just flushed is gone, this means all
948 * older lists are also gone, so first_jl is no longer 948 * older lists are also gone, so first_jl is no longer
949 * valid either. Go back to the beginning. 949 * valid either. Go back to the beginning.
950 */ 950 */
951 if (!journal_list_still_alive 951 if (!journal_list_still_alive
952 (s, other_trans_id)) { 952 (s, other_trans_id)) {
953 goto find_first; 953 goto find_first;
954 } 954 }
955 } 955 }
956 entry = entry->next; 956 entry = entry->next;
957 if (entry == &journal->j_journal_list) 957 if (entry == &journal->j_journal_list)
958 return 0; 958 return 0;
959 } else { 959 } else {
960 return 0; 960 return 0;
961 } 961 }
962 } 962 }
963 return 0; 963 return 0;
964 } 964 }
965 int reiserfs_async_progress_wait(struct super_block *s) 965 int reiserfs_async_progress_wait(struct super_block *s)
966 { 966 {
967 DEFINE_WAIT(wait); 967 DEFINE_WAIT(wait);
968 struct reiserfs_journal *j = SB_JOURNAL(s); 968 struct reiserfs_journal *j = SB_JOURNAL(s);
969 if (atomic_read(&j->j_async_throttle)) 969 if (atomic_read(&j->j_async_throttle))
970 blk_congestion_wait(WRITE, HZ / 10); 970 blk_congestion_wait(WRITE, HZ / 10);
971 return 0; 971 return 0;
972 } 972 }
973 973
974 /* 974 /*
975 ** if this journal list still has commit blocks unflushed, send them to disk. 975 ** if this journal list still has commit blocks unflushed, send them to disk.
976 ** 976 **
977 ** log areas must be flushed in order (transaction 2 can't commit before transaction 1) 977 ** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
978 ** Before the commit block can by written, every other log block must be safely on disk 978 ** Before the commit block can by written, every other log block must be safely on disk
979 ** 979 **
980 */ 980 */
981 static int flush_commit_list(struct super_block *s, 981 static int flush_commit_list(struct super_block *s,
982 struct reiserfs_journal_list *jl, int flushall) 982 struct reiserfs_journal_list *jl, int flushall)
983 { 983 {
984 int i; 984 int i;
985 int bn; 985 int bn;
986 struct buffer_head *tbh = NULL; 986 struct buffer_head *tbh = NULL;
987 unsigned long trans_id = jl->j_trans_id; 987 unsigned long trans_id = jl->j_trans_id;
988 struct reiserfs_journal *journal = SB_JOURNAL(s); 988 struct reiserfs_journal *journal = SB_JOURNAL(s);
989 int barrier = 0; 989 int barrier = 0;
990 int retval = 0; 990 int retval = 0;
991 int write_len;
991 992
992 reiserfs_check_lock_depth(s, "flush_commit_list"); 993 reiserfs_check_lock_depth(s, "flush_commit_list");
993 994
994 if (atomic_read(&jl->j_older_commits_done)) { 995 if (atomic_read(&jl->j_older_commits_done)) {
995 return 0; 996 return 0;
996 } 997 }
997 998
998 get_fs_excl(); 999 get_fs_excl();
999 1000
1000 /* before we can put our commit blocks on disk, we have to make sure everyone older than 1001 /* before we can put our commit blocks on disk, we have to make sure everyone older than
1001 ** us is on disk too 1002 ** us is on disk too
1002 */ 1003 */
1003 BUG_ON(jl->j_len <= 0); 1004 BUG_ON(jl->j_len <= 0);
1004 BUG_ON(trans_id == journal->j_trans_id); 1005 BUG_ON(trans_id == journal->j_trans_id);
1005 1006
1006 get_journal_list(jl); 1007 get_journal_list(jl);
1007 if (flushall) { 1008 if (flushall) {
1008 if (flush_older_commits(s, jl) == 1) { 1009 if (flush_older_commits(s, jl) == 1) {
1009 /* list disappeared during flush_older_commits. return */ 1010 /* list disappeared during flush_older_commits. return */
1010 goto put_jl; 1011 goto put_jl;
1011 } 1012 }
1012 } 1013 }
1013 1014
1014 /* make sure nobody is trying to flush this one at the same time */ 1015 /* make sure nobody is trying to flush this one at the same time */
1015 down(&jl->j_commit_lock); 1016 down(&jl->j_commit_lock);
1016 if (!journal_list_still_alive(s, trans_id)) { 1017 if (!journal_list_still_alive(s, trans_id)) {
1017 up(&jl->j_commit_lock); 1018 up(&jl->j_commit_lock);
1018 goto put_jl; 1019 goto put_jl;
1019 } 1020 }
1020 BUG_ON(jl->j_trans_id == 0); 1021 BUG_ON(jl->j_trans_id == 0);
1021 1022
1022 /* this commit is done, exit */ 1023 /* this commit is done, exit */
1023 if (atomic_read(&(jl->j_commit_left)) <= 0) { 1024 if (atomic_read(&(jl->j_commit_left)) <= 0) {
1024 if (flushall) { 1025 if (flushall) {
1025 atomic_set(&(jl->j_older_commits_done), 1); 1026 atomic_set(&(jl->j_older_commits_done), 1);
1026 } 1027 }
1027 up(&jl->j_commit_lock); 1028 up(&jl->j_commit_lock);
1028 goto put_jl; 1029 goto put_jl;
1029 } 1030 }
1030 1031
1031 if (!list_empty(&jl->j_bh_list)) { 1032 if (!list_empty(&jl->j_bh_list)) {
1032 unlock_kernel(); 1033 unlock_kernel();
1033 write_ordered_buffers(&journal->j_dirty_buffers_lock, 1034 write_ordered_buffers(&journal->j_dirty_buffers_lock,
1034 journal, jl, &jl->j_bh_list); 1035 journal, jl, &jl->j_bh_list);
1035 lock_kernel(); 1036 lock_kernel();
1036 } 1037 }
1037 BUG_ON(!list_empty(&jl->j_bh_list)); 1038 BUG_ON(!list_empty(&jl->j_bh_list));
1038 /* 1039 /*
1039 * for the description block and all the log blocks, submit any buffers 1040 * for the description block and all the log blocks, submit any buffers
1040 * that haven't already reached the disk 1041 * that haven't already reached the disk. Try to write at least 256
1042 * log blocks. later on, we will only wait on blocks that correspond
1043 * to this transaction, but while we're unplugging we might as well
1044 * get a chunk of data on there.
1041 */ 1045 */
1042 atomic_inc(&journal->j_async_throttle); 1046 atomic_inc(&journal->j_async_throttle);
1043 for (i = 0; i < (jl->j_len + 1); i++) { 1047 write_len = jl->j_len + 1;
1048 if (write_len < 256)
1049 write_len = 256;
1050 for (i = 0 ; i < write_len ; i++) {
1044 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % 1051 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
1045 SB_ONDISK_JOURNAL_SIZE(s); 1052 SB_ONDISK_JOURNAL_SIZE(s);
1046 tbh = journal_find_get_block(s, bn); 1053 tbh = journal_find_get_block(s, bn);
1047 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ 1054 if (tbh) {
1048 ll_rw_block(SWRITE, 1, &tbh); 1055 if (buffer_dirty(tbh))
1049 put_bh(tbh); 1056 ll_rw_block(WRITE, 1, &tbh) ;
1057 put_bh(tbh) ;
1058 }
1050 } 1059 }
1051 atomic_dec(&journal->j_async_throttle); 1060 atomic_dec(&journal->j_async_throttle);
1052 1061
1053 /* We're skipping the commit if there's an error */ 1062 /* We're skipping the commit if there's an error */
1054 if (retval || reiserfs_is_journal_aborted(journal)) 1063 if (retval || reiserfs_is_journal_aborted(journal))
1055 barrier = 0; 1064 barrier = 0;
1056 1065
1057 /* wait on everything written so far before writing the commit 1066 /* wait on everything written so far before writing the commit
1058 * if we are in barrier mode, send the commit down now 1067 * if we are in barrier mode, send the commit down now
1059 */ 1068 */
1060 barrier = reiserfs_barrier_flush(s); 1069 barrier = reiserfs_barrier_flush(s);
1061 if (barrier) { 1070 if (barrier) {
1062 int ret; 1071 int ret;
1063 lock_buffer(jl->j_commit_bh); 1072 lock_buffer(jl->j_commit_bh);
1064 ret = submit_barrier_buffer(jl->j_commit_bh); 1073 ret = submit_barrier_buffer(jl->j_commit_bh);
1065 if (ret == -EOPNOTSUPP) { 1074 if (ret == -EOPNOTSUPP) {
1066 set_buffer_uptodate(jl->j_commit_bh); 1075 set_buffer_uptodate(jl->j_commit_bh);
1067 disable_barrier(s); 1076 disable_barrier(s);
1068 barrier = 0; 1077 barrier = 0;
1069 } 1078 }
1070 } 1079 }
1071 for (i = 0; i < (jl->j_len + 1); i++) { 1080 for (i = 0; i < (jl->j_len + 1); i++) {
1072 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 1081 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
1073 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); 1082 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
1074 tbh = journal_find_get_block(s, bn); 1083 tbh = journal_find_get_block(s, bn);
1075 wait_on_buffer(tbh); 1084 wait_on_buffer(tbh);
1076 // since we're using ll_rw_blk above, it might have skipped over 1085 // since we're using ll_rw_blk above, it might have skipped over
1077 // a locked buffer. Double check here 1086 // a locked buffer. Double check here
1078 // 1087 //
1079 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ 1088 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */
1080 sync_dirty_buffer(tbh); 1089 sync_dirty_buffer(tbh);
1081 if (unlikely(!buffer_uptodate(tbh))) { 1090 if (unlikely(!buffer_uptodate(tbh))) {
1082 #ifdef CONFIG_REISERFS_CHECK 1091 #ifdef CONFIG_REISERFS_CHECK
1083 reiserfs_warning(s, "journal-601, buffer write failed"); 1092 reiserfs_warning(s, "journal-601, buffer write failed");
1084 #endif 1093 #endif
1085 retval = -EIO; 1094 retval = -EIO;
1086 } 1095 }
1087 put_bh(tbh); /* once for journal_find_get_block */ 1096 put_bh(tbh); /* once for journal_find_get_block */
1088 put_bh(tbh); /* once due to original getblk in do_journal_end */ 1097 put_bh(tbh); /* once due to original getblk in do_journal_end */
1089 atomic_dec(&(jl->j_commit_left)); 1098 atomic_dec(&(jl->j_commit_left));
1090 } 1099 }
1091 1100
1092 BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); 1101 BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
1093 1102
1094 if (!barrier) { 1103 if (!barrier) {
1095 /* If there was a write error in the journal - we can't commit 1104 /* If there was a write error in the journal - we can't commit
1096 * this transaction - it will be invalid and, if successful, 1105 * this transaction - it will be invalid and, if successful,
1097 * will just end up propogating the write error out to 1106 * will just end up propogating the write error out to
1098 * the file system. */ 1107 * the file system. */
1099 if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { 1108 if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
1100 if (buffer_dirty(jl->j_commit_bh)) 1109 if (buffer_dirty(jl->j_commit_bh))
1101 BUG(); 1110 BUG();
1102 mark_buffer_dirty(jl->j_commit_bh) ; 1111 mark_buffer_dirty(jl->j_commit_bh) ;
1103 sync_dirty_buffer(jl->j_commit_bh) ; 1112 sync_dirty_buffer(jl->j_commit_bh) ;
1104 } 1113 }
1105 } else 1114 } else
1106 wait_on_buffer(jl->j_commit_bh); 1115 wait_on_buffer(jl->j_commit_bh);
1107 1116
1108 check_barrier_completion(s, jl->j_commit_bh); 1117 check_barrier_completion(s, jl->j_commit_bh);
1109 1118
1110 /* If there was a write error in the journal - we can't commit this 1119 /* If there was a write error in the journal - we can't commit this
1111 * transaction - it will be invalid and, if successful, will just end 1120 * transaction - it will be invalid and, if successful, will just end
1112 * up propogating the write error out to the filesystem. */ 1121 * up propogating the write error out to the filesystem. */
1113 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { 1122 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
1114 #ifdef CONFIG_REISERFS_CHECK 1123 #ifdef CONFIG_REISERFS_CHECK
1115 reiserfs_warning(s, "journal-615: buffer write failed"); 1124 reiserfs_warning(s, "journal-615: buffer write failed");
1116 #endif 1125 #endif
1117 retval = -EIO; 1126 retval = -EIO;
1118 } 1127 }
1119 bforget(jl->j_commit_bh); 1128 bforget(jl->j_commit_bh);
1120 if (journal->j_last_commit_id != 0 && 1129 if (journal->j_last_commit_id != 0 &&
1121 (jl->j_trans_id - journal->j_last_commit_id) != 1) { 1130 (jl->j_trans_id - journal->j_last_commit_id) != 1) {
1122 reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", 1131 reiserfs_warning(s, "clm-2200: last commit %lu, current %lu",
1123 journal->j_last_commit_id, jl->j_trans_id); 1132 journal->j_last_commit_id, jl->j_trans_id);
1124 } 1133 }
1125 journal->j_last_commit_id = jl->j_trans_id; 1134 journal->j_last_commit_id = jl->j_trans_id;
1126 1135
1127 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ 1136 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */
1128 cleanup_freed_for_journal_list(s, jl); 1137 cleanup_freed_for_journal_list(s, jl);
1129 1138
1130 retval = retval ? retval : journal->j_errno; 1139 retval = retval ? retval : journal->j_errno;
1131 1140
1132 /* mark the metadata dirty */ 1141 /* mark the metadata dirty */
1133 if (!retval) 1142 if (!retval)
1134 dirty_one_transaction(s, jl); 1143 dirty_one_transaction(s, jl);
1135 atomic_dec(&(jl->j_commit_left)); 1144 atomic_dec(&(jl->j_commit_left));
1136 1145
1137 if (flushall) { 1146 if (flushall) {
1138 atomic_set(&(jl->j_older_commits_done), 1); 1147 atomic_set(&(jl->j_older_commits_done), 1);
1139 } 1148 }
1140 up(&jl->j_commit_lock); 1149 up(&jl->j_commit_lock);
1141 put_jl: 1150 put_jl:
1142 put_journal_list(s, jl); 1151 put_journal_list(s, jl);
1143 1152
1144 if (retval) 1153 if (retval)
1145 reiserfs_abort(s, retval, "Journal write error in %s", 1154 reiserfs_abort(s, retval, "Journal write error in %s",
1146 __FUNCTION__); 1155 __FUNCTION__);
1147 put_fs_excl(); 1156 put_fs_excl();
1148 return retval; 1157 return retval;
1149 } 1158 }
1150 1159
1151 /* 1160 /*
1152 ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 1161 ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or
1153 ** returns NULL if it can't find anything 1162 ** returns NULL if it can't find anything
1154 */ 1163 */
1155 static struct reiserfs_journal_list *find_newer_jl_for_cn(struct 1164 static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1156 reiserfs_journal_cnode 1165 reiserfs_journal_cnode
1157 *cn) 1166 *cn)
1158 { 1167 {
1159 struct super_block *sb = cn->sb; 1168 struct super_block *sb = cn->sb;
1160 b_blocknr_t blocknr = cn->blocknr; 1169 b_blocknr_t blocknr = cn->blocknr;
1161 1170
1162 cn = cn->hprev; 1171 cn = cn->hprev;
1163 while (cn) { 1172 while (cn) {
1164 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { 1173 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
1165 return cn->jlist; 1174 return cn->jlist;
1166 } 1175 }
1167 cn = cn->hprev; 1176 cn = cn->hprev;
1168 } 1177 }
1169 return NULL; 1178 return NULL;
1170 } 1179 }
1171 1180
1172 static void remove_journal_hash(struct super_block *, 1181 static void remove_journal_hash(struct super_block *,
1173 struct reiserfs_journal_cnode **, 1182 struct reiserfs_journal_cnode **,
1174 struct reiserfs_journal_list *, unsigned long, 1183 struct reiserfs_journal_list *, unsigned long,
1175 int); 1184 int);
1176 1185
1177 /* 1186 /*
1178 ** once all the real blocks have been flushed, it is safe to remove them from the 1187 ** once all the real blocks have been flushed, it is safe to remove them from the
1179 ** journal list for this transaction. Aside from freeing the cnode, this also allows the 1188 ** journal list for this transaction. Aside from freeing the cnode, this also allows the
1180 ** block to be reallocated for data blocks if it had been deleted. 1189 ** block to be reallocated for data blocks if it had been deleted.
1181 */ 1190 */
1182 static void remove_all_from_journal_list(struct super_block *p_s_sb, 1191 static void remove_all_from_journal_list(struct super_block *p_s_sb,
1183 struct reiserfs_journal_list *jl, 1192 struct reiserfs_journal_list *jl,
1184 int debug) 1193 int debug)
1185 { 1194 {
1186 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1195 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1187 struct reiserfs_journal_cnode *cn, *last; 1196 struct reiserfs_journal_cnode *cn, *last;
1188 cn = jl->j_realblock; 1197 cn = jl->j_realblock;
1189 1198
1190 /* which is better, to lock once around the whole loop, or 1199 /* which is better, to lock once around the whole loop, or
1191 ** to lock for each call to remove_journal_hash? 1200 ** to lock for each call to remove_journal_hash?
1192 */ 1201 */
1193 while (cn) { 1202 while (cn) {
1194 if (cn->blocknr != 0) { 1203 if (cn->blocknr != 0) {
1195 if (debug) { 1204 if (debug) {
1196 reiserfs_warning(p_s_sb, 1205 reiserfs_warning(p_s_sb,
1197 "block %u, bh is %d, state %ld", 1206 "block %u, bh is %d, state %ld",
1198 cn->blocknr, cn->bh ? 1 : 0, 1207 cn->blocknr, cn->bh ? 1 : 0,
1199 cn->state); 1208 cn->state);
1200 } 1209 }
1201 cn->state = 0; 1210 cn->state = 0;
1202 remove_journal_hash(p_s_sb, journal->j_list_hash_table, 1211 remove_journal_hash(p_s_sb, journal->j_list_hash_table,
1203 jl, cn->blocknr, 1); 1212 jl, cn->blocknr, 1);
1204 } 1213 }
1205 last = cn; 1214 last = cn;
1206 cn = cn->next; 1215 cn = cn->next;
1207 free_cnode(p_s_sb, last); 1216 free_cnode(p_s_sb, last);
1208 } 1217 }
1209 jl->j_realblock = NULL; 1218 jl->j_realblock = NULL;
1210 } 1219 }
1211 1220
1212 /* 1221 /*
1213 ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. 1222 ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
1214 ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start 1223 ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
1215 ** releasing blocks in this transaction for reuse as data blocks. 1224 ** releasing blocks in this transaction for reuse as data blocks.
1216 ** called by flush_journal_list, before it calls remove_all_from_journal_list 1225 ** called by flush_journal_list, before it calls remove_all_from_journal_list
1217 ** 1226 **
1218 */ 1227 */
1219 static int _update_journal_header_block(struct super_block *p_s_sb, 1228 static int _update_journal_header_block(struct super_block *p_s_sb,
1220 unsigned long offset, 1229 unsigned long offset,
1221 unsigned long trans_id) 1230 unsigned long trans_id)
1222 { 1231 {
1223 struct reiserfs_journal_header *jh; 1232 struct reiserfs_journal_header *jh;
1224 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1233 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1225 1234
1226 if (reiserfs_is_journal_aborted(journal)) 1235 if (reiserfs_is_journal_aborted(journal))
1227 return -EIO; 1236 return -EIO;
1228 1237
1229 if (trans_id >= journal->j_last_flush_trans_id) { 1238 if (trans_id >= journal->j_last_flush_trans_id) {
1230 if (buffer_locked((journal->j_header_bh))) { 1239 if (buffer_locked((journal->j_header_bh))) {
1231 wait_on_buffer((journal->j_header_bh)); 1240 wait_on_buffer((journal->j_header_bh));
1232 if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 1241 if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
1233 #ifdef CONFIG_REISERFS_CHECK 1242 #ifdef CONFIG_REISERFS_CHECK
1234 reiserfs_warning(p_s_sb, 1243 reiserfs_warning(p_s_sb,
1235 "journal-699: buffer write failed"); 1244 "journal-699: buffer write failed");
1236 #endif 1245 #endif
1237 return -EIO; 1246 return -EIO;
1238 } 1247 }
1239 } 1248 }
1240 journal->j_last_flush_trans_id = trans_id; 1249 journal->j_last_flush_trans_id = trans_id;
1241 journal->j_first_unflushed_offset = offset; 1250 journal->j_first_unflushed_offset = offset;
1242 jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> 1251 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1243 b_data); 1252 b_data);
1244 jh->j_last_flush_trans_id = cpu_to_le32(trans_id); 1253 jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
1245 jh->j_first_unflushed_offset = cpu_to_le32(offset); 1254 jh->j_first_unflushed_offset = cpu_to_le32(offset);
1246 jh->j_mount_id = cpu_to_le32(journal->j_mount_id); 1255 jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
1247 1256
1248 if (reiserfs_barrier_flush(p_s_sb)) { 1257 if (reiserfs_barrier_flush(p_s_sb)) {
1249 int ret; 1258 int ret;
1250 lock_buffer(journal->j_header_bh); 1259 lock_buffer(journal->j_header_bh);
1251 ret = submit_barrier_buffer(journal->j_header_bh); 1260 ret = submit_barrier_buffer(journal->j_header_bh);
1252 if (ret == -EOPNOTSUPP) { 1261 if (ret == -EOPNOTSUPP) {
1253 set_buffer_uptodate(journal->j_header_bh); 1262 set_buffer_uptodate(journal->j_header_bh);
1254 disable_barrier(p_s_sb); 1263 disable_barrier(p_s_sb);
1255 goto sync; 1264 goto sync;
1256 } 1265 }
1257 wait_on_buffer(journal->j_header_bh); 1266 wait_on_buffer(journal->j_header_bh);
1258 check_barrier_completion(p_s_sb, journal->j_header_bh); 1267 check_barrier_completion(p_s_sb, journal->j_header_bh);
1259 } else { 1268 } else {
1260 sync: 1269 sync:
1261 set_buffer_dirty(journal->j_header_bh); 1270 set_buffer_dirty(journal->j_header_bh);
1262 sync_dirty_buffer(journal->j_header_bh); 1271 sync_dirty_buffer(journal->j_header_bh);
1263 } 1272 }
1264 if (!buffer_uptodate(journal->j_header_bh)) { 1273 if (!buffer_uptodate(journal->j_header_bh)) {
1265 reiserfs_warning(p_s_sb, 1274 reiserfs_warning(p_s_sb,
1266 "journal-837: IO error during journal replay"); 1275 "journal-837: IO error during journal replay");
1267 return -EIO; 1276 return -EIO;
1268 } 1277 }
1269 } 1278 }
1270 return 0; 1279 return 0;
1271 } 1280 }
1272 1281
1273 static int update_journal_header_block(struct super_block *p_s_sb, 1282 static int update_journal_header_block(struct super_block *p_s_sb,
1274 unsigned long offset, 1283 unsigned long offset,
1275 unsigned long trans_id) 1284 unsigned long trans_id)
1276 { 1285 {
1277 return _update_journal_header_block(p_s_sb, offset, trans_id); 1286 return _update_journal_header_block(p_s_sb, offset, trans_id);
1278 } 1287 }
1279 1288
1280 /* 1289 /*
1281 ** flush any and all journal lists older than you are 1290 ** flush any and all journal lists older than you are
1282 ** can only be called from flush_journal_list 1291 ** can only be called from flush_journal_list
1283 */ 1292 */
1284 static int flush_older_journal_lists(struct super_block *p_s_sb, 1293 static int flush_older_journal_lists(struct super_block *p_s_sb,
1285 struct reiserfs_journal_list *jl) 1294 struct reiserfs_journal_list *jl)
1286 { 1295 {
1287 struct list_head *entry; 1296 struct list_head *entry;
1288 struct reiserfs_journal_list *other_jl; 1297 struct reiserfs_journal_list *other_jl;
1289 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1298 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1290 unsigned long trans_id = jl->j_trans_id; 1299 unsigned long trans_id = jl->j_trans_id;
1291 1300
1292 /* we know we are the only ones flushing things, no extra race 1301 /* we know we are the only ones flushing things, no extra race
1293 * protection is required. 1302 * protection is required.
1294 */ 1303 */
1295 restart: 1304 restart:
1296 entry = journal->j_journal_list.next; 1305 entry = journal->j_journal_list.next;
1297 /* Did we wrap? */ 1306 /* Did we wrap? */
1298 if (entry == &journal->j_journal_list) 1307 if (entry == &journal->j_journal_list)
1299 return 0; 1308 return 0;
1300 other_jl = JOURNAL_LIST_ENTRY(entry); 1309 other_jl = JOURNAL_LIST_ENTRY(entry);
1301 if (other_jl->j_trans_id < trans_id) { 1310 if (other_jl->j_trans_id < trans_id) {
1302 BUG_ON(other_jl->j_refcount <= 0); 1311 BUG_ON(other_jl->j_refcount <= 0);
1303 /* do not flush all */ 1312 /* do not flush all */
1304 flush_journal_list(p_s_sb, other_jl, 0); 1313 flush_journal_list(p_s_sb, other_jl, 0);
1305 1314
1306 /* other_jl is now deleted from the list */ 1315 /* other_jl is now deleted from the list */
1307 goto restart; 1316 goto restart;
1308 } 1317 }
1309 return 0; 1318 return 0;
1310 } 1319 }
1311 1320
1312 static void del_from_work_list(struct super_block *s, 1321 static void del_from_work_list(struct super_block *s,
1313 struct reiserfs_journal_list *jl) 1322 struct reiserfs_journal_list *jl)
1314 { 1323 {
1315 struct reiserfs_journal *journal = SB_JOURNAL(s); 1324 struct reiserfs_journal *journal = SB_JOURNAL(s);
1316 if (!list_empty(&jl->j_working_list)) { 1325 if (!list_empty(&jl->j_working_list)) {
1317 list_del_init(&jl->j_working_list); 1326 list_del_init(&jl->j_working_list);
1318 journal->j_num_work_lists--; 1327 journal->j_num_work_lists--;
1319 } 1328 }
1320 } 1329 }
1321 1330
1322 /* flush a journal list, both commit and real blocks 1331 /* flush a journal list, both commit and real blocks
1323 ** 1332 **
1324 ** always set flushall to 1, unless you are calling from inside 1333 ** always set flushall to 1, unless you are calling from inside
1325 ** flush_journal_list 1334 ** flush_journal_list
1326 ** 1335 **
1327 ** IMPORTANT. This can only be called while there are no journal writers, 1336 ** IMPORTANT. This can only be called while there are no journal writers,
1328 ** and the journal is locked. That means it can only be called from 1337 ** and the journal is locked. That means it can only be called from
1329 ** do_journal_end, or by journal_release 1338 ** do_journal_end, or by journal_release
1330 */ 1339 */
1331 static int flush_journal_list(struct super_block *s, 1340 static int flush_journal_list(struct super_block *s,
1332 struct reiserfs_journal_list *jl, int flushall) 1341 struct reiserfs_journal_list *jl, int flushall)
1333 { 1342 {
1334 struct reiserfs_journal_list *pjl; 1343 struct reiserfs_journal_list *pjl;
1335 struct reiserfs_journal_cnode *cn, *last; 1344 struct reiserfs_journal_cnode *cn, *last;
1336 int count; 1345 int count;
1337 int was_jwait = 0; 1346 int was_jwait = 0;
1338 int was_dirty = 0; 1347 int was_dirty = 0;
1339 struct buffer_head *saved_bh; 1348 struct buffer_head *saved_bh;
1340 unsigned long j_len_saved = jl->j_len; 1349 unsigned long j_len_saved = jl->j_len;
1341 struct reiserfs_journal *journal = SB_JOURNAL(s); 1350 struct reiserfs_journal *journal = SB_JOURNAL(s);
1342 int err = 0; 1351 int err = 0;
1343 1352
1344 BUG_ON(j_len_saved <= 0); 1353 BUG_ON(j_len_saved <= 0);
1345 1354
1346 if (atomic_read(&journal->j_wcount) != 0) { 1355 if (atomic_read(&journal->j_wcount) != 0) {
1347 reiserfs_warning(s, 1356 reiserfs_warning(s,
1348 "clm-2048: flush_journal_list called with wcount %d", 1357 "clm-2048: flush_journal_list called with wcount %d",
1349 atomic_read(&journal->j_wcount)); 1358 atomic_read(&journal->j_wcount));
1350 } 1359 }
1351 BUG_ON(jl->j_trans_id == 0); 1360 BUG_ON(jl->j_trans_id == 0);
1352 1361
1353 /* if flushall == 0, the lock is already held */ 1362 /* if flushall == 0, the lock is already held */
1354 if (flushall) { 1363 if (flushall) {
1355 down(&journal->j_flush_sem); 1364 down(&journal->j_flush_sem);
1356 } else if (!down_trylock(&journal->j_flush_sem)) { 1365 } else if (!down_trylock(&journal->j_flush_sem)) {
1357 BUG(); 1366 BUG();
1358 } 1367 }
1359 1368
1360 count = 0; 1369 count = 0;
1361 if (j_len_saved > journal->j_trans_max) { 1370 if (j_len_saved > journal->j_trans_max) {
1362 reiserfs_panic(s, 1371 reiserfs_panic(s,
1363 "journal-715: flush_journal_list, length is %lu, trans id %lu\n", 1372 "journal-715: flush_journal_list, length is %lu, trans id %lu\n",
1364 j_len_saved, jl->j_trans_id); 1373 j_len_saved, jl->j_trans_id);
1365 return 0; 1374 return 0;
1366 } 1375 }
1367 1376
1368 get_fs_excl(); 1377 get_fs_excl();
1369 1378
1370 /* if all the work is already done, get out of here */ 1379 /* if all the work is already done, get out of here */
1371 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1380 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1372 atomic_read(&(jl->j_commit_left)) <= 0) { 1381 atomic_read(&(jl->j_commit_left)) <= 0) {
1373 goto flush_older_and_return; 1382 goto flush_older_and_return;
1374 } 1383 }
1375 1384
1376 /* start by putting the commit list on disk. This will also flush 1385 /* start by putting the commit list on disk. This will also flush
1377 ** the commit lists of any olders transactions 1386 ** the commit lists of any olders transactions
1378 */ 1387 */
1379 flush_commit_list(s, jl, 1); 1388 flush_commit_list(s, jl, 1);
1380 1389
1381 if (!(jl->j_state & LIST_DIRTY) 1390 if (!(jl->j_state & LIST_DIRTY)
1382 && !reiserfs_is_journal_aborted(journal)) 1391 && !reiserfs_is_journal_aborted(journal))
1383 BUG(); 1392 BUG();
1384 1393
1385 /* are we done now? */ 1394 /* are we done now? */
1386 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1395 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1387 atomic_read(&(jl->j_commit_left)) <= 0) { 1396 atomic_read(&(jl->j_commit_left)) <= 0) {
1388 goto flush_older_and_return; 1397 goto flush_older_and_return;
1389 } 1398 }
1390 1399
1391 /* loop through each cnode, see if we need to write it, 1400 /* loop through each cnode, see if we need to write it,
1392 ** or wait on a more recent transaction, or just ignore it 1401 ** or wait on a more recent transaction, or just ignore it
1393 */ 1402 */
1394 if (atomic_read(&(journal->j_wcount)) != 0) { 1403 if (atomic_read(&(journal->j_wcount)) != 0) {
1395 reiserfs_panic(s, 1404 reiserfs_panic(s,
1396 "journal-844: panic journal list is flushing, wcount is not 0\n"); 1405 "journal-844: panic journal list is flushing, wcount is not 0\n");
1397 } 1406 }
1398 cn = jl->j_realblock; 1407 cn = jl->j_realblock;
1399 while (cn) { 1408 while (cn) {
1400 was_jwait = 0; 1409 was_jwait = 0;
1401 was_dirty = 0; 1410 was_dirty = 0;
1402 saved_bh = NULL; 1411 saved_bh = NULL;
1403 /* blocknr of 0 is no longer in the hash, ignore it */ 1412 /* blocknr of 0 is no longer in the hash, ignore it */
1404 if (cn->blocknr == 0) { 1413 if (cn->blocknr == 0) {
1405 goto free_cnode; 1414 goto free_cnode;
1406 } 1415 }
1407 1416
1408 /* This transaction failed commit. Don't write out to the disk */ 1417 /* This transaction failed commit. Don't write out to the disk */
1409 if (!(jl->j_state & LIST_DIRTY)) 1418 if (!(jl->j_state & LIST_DIRTY))
1410 goto free_cnode; 1419 goto free_cnode;
1411 1420
1412 pjl = find_newer_jl_for_cn(cn); 1421 pjl = find_newer_jl_for_cn(cn);
1413 /* the order is important here. We check pjl to make sure we 1422 /* the order is important here. We check pjl to make sure we
1414 ** don't clear BH_JDirty_wait if we aren't the one writing this 1423 ** don't clear BH_JDirty_wait if we aren't the one writing this
1415 ** block to disk 1424 ** block to disk
1416 */ 1425 */
1417 if (!pjl && cn->bh) { 1426 if (!pjl && cn->bh) {
1418 saved_bh = cn->bh; 1427 saved_bh = cn->bh;
1419 1428
1420 /* we do this to make sure nobody releases the buffer while 1429 /* we do this to make sure nobody releases the buffer while
1421 ** we are working with it 1430 ** we are working with it
1422 */ 1431 */
1423 get_bh(saved_bh); 1432 get_bh(saved_bh);
1424 1433
1425 if (buffer_journal_dirty(saved_bh)) { 1434 if (buffer_journal_dirty(saved_bh)) {
1426 BUG_ON(!can_dirty(cn)); 1435 BUG_ON(!can_dirty(cn));
1427 was_jwait = 1; 1436 was_jwait = 1;
1428 was_dirty = 1; 1437 was_dirty = 1;
1429 } else if (can_dirty(cn)) { 1438 } else if (can_dirty(cn)) {
1430 /* everything with !pjl && jwait should be writable */ 1439 /* everything with !pjl && jwait should be writable */
1431 BUG(); 1440 BUG();
1432 } 1441 }
1433 } 1442 }
1434 1443
1435 /* if someone has this block in a newer transaction, just make 1444 /* if someone has this block in a newer transaction, just make
1436 ** sure they are commited, and don't try writing it to disk 1445 ** sure they are commited, and don't try writing it to disk
1437 */ 1446 */
1438 if (pjl) { 1447 if (pjl) {
1439 if (atomic_read(&pjl->j_commit_left)) 1448 if (atomic_read(&pjl->j_commit_left))
1440 flush_commit_list(s, pjl, 1); 1449 flush_commit_list(s, pjl, 1);
1441 goto free_cnode; 1450 goto free_cnode;
1442 } 1451 }
1443 1452
1444 /* bh == NULL when the block got to disk on its own, OR, 1453 /* bh == NULL when the block got to disk on its own, OR,
1445 ** the block got freed in a future transaction 1454 ** the block got freed in a future transaction
1446 */ 1455 */
1447 if (saved_bh == NULL) { 1456 if (saved_bh == NULL) {
1448 goto free_cnode; 1457 goto free_cnode;
1449 } 1458 }
1450 1459
1451 /* this should never happen. kupdate_one_transaction has this list 1460 /* this should never happen. kupdate_one_transaction has this list
1452 ** locked while it works, so we should never see a buffer here that 1461 ** locked while it works, so we should never see a buffer here that
1453 ** is not marked JDirty_wait 1462 ** is not marked JDirty_wait
1454 */ 1463 */
1455 if ((!was_jwait) && !buffer_locked(saved_bh)) { 1464 if ((!was_jwait) && !buffer_locked(saved_bh)) {
1456 reiserfs_warning(s, 1465 reiserfs_warning(s,
1457 "journal-813: BAD! buffer %llu %cdirty %cjwait, " 1466 "journal-813: BAD! buffer %llu %cdirty %cjwait, "
1458 "not in a newer tranasction", 1467 "not in a newer tranasction",
1459 (unsigned long long)saved_bh-> 1468 (unsigned long long)saved_bh->
1460 b_blocknr, was_dirty ? ' ' : '!', 1469 b_blocknr, was_dirty ? ' ' : '!',
1461 was_jwait ? ' ' : '!'); 1470 was_jwait ? ' ' : '!');
1462 } 1471 }
1463 if (was_dirty) { 1472 if (was_dirty) {
1464 /* we inc again because saved_bh gets decremented at free_cnode */ 1473 /* we inc again because saved_bh gets decremented at free_cnode */
1465 get_bh(saved_bh); 1474 get_bh(saved_bh);
1466 set_bit(BLOCK_NEEDS_FLUSH, &cn->state); 1475 set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
1467 lock_buffer(saved_bh); 1476 lock_buffer(saved_bh);
1468 BUG_ON(cn->blocknr != saved_bh->b_blocknr); 1477 BUG_ON(cn->blocknr != saved_bh->b_blocknr);
1469 if (buffer_dirty(saved_bh)) 1478 if (buffer_dirty(saved_bh))
1470 submit_logged_buffer(saved_bh); 1479 submit_logged_buffer(saved_bh);
1471 else 1480 else
1472 unlock_buffer(saved_bh); 1481 unlock_buffer(saved_bh);
1473 count++; 1482 count++;
1474 } else { 1483 } else {
1475 reiserfs_warning(s, 1484 reiserfs_warning(s,
1476 "clm-2082: Unable to flush buffer %llu in %s", 1485 "clm-2082: Unable to flush buffer %llu in %s",
1477 (unsigned long long)saved_bh-> 1486 (unsigned long long)saved_bh->
1478 b_blocknr, __FUNCTION__); 1487 b_blocknr, __FUNCTION__);
1479 } 1488 }
1480 free_cnode: 1489 free_cnode:
1481 last = cn; 1490 last = cn;
1482 cn = cn->next; 1491 cn = cn->next;
1483 if (saved_bh) { 1492 if (saved_bh) {
1484 /* we incremented this to keep others from taking the buffer head away */ 1493 /* we incremented this to keep others from taking the buffer head away */
1485 put_bh(saved_bh); 1494 put_bh(saved_bh);
1486 if (atomic_read(&(saved_bh->b_count)) < 0) { 1495 if (atomic_read(&(saved_bh->b_count)) < 0) {
1487 reiserfs_warning(s, 1496 reiserfs_warning(s,
1488 "journal-945: saved_bh->b_count < 0"); 1497 "journal-945: saved_bh->b_count < 0");
1489 } 1498 }
1490 } 1499 }
1491 } 1500 }
1492 if (count > 0) { 1501 if (count > 0) {
1493 cn = jl->j_realblock; 1502 cn = jl->j_realblock;
1494 while (cn) { 1503 while (cn) {
1495 if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { 1504 if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
1496 if (!cn->bh) { 1505 if (!cn->bh) {
1497 reiserfs_panic(s, 1506 reiserfs_panic(s,
1498 "journal-1011: cn->bh is NULL\n"); 1507 "journal-1011: cn->bh is NULL\n");
1499 } 1508 }
1500 wait_on_buffer(cn->bh); 1509 wait_on_buffer(cn->bh);
1501 if (!cn->bh) { 1510 if (!cn->bh) {
1502 reiserfs_panic(s, 1511 reiserfs_panic(s,
1503 "journal-1012: cn->bh is NULL\n"); 1512 "journal-1012: cn->bh is NULL\n");
1504 } 1513 }
1505 if (unlikely(!buffer_uptodate(cn->bh))) { 1514 if (unlikely(!buffer_uptodate(cn->bh))) {
1506 #ifdef CONFIG_REISERFS_CHECK 1515 #ifdef CONFIG_REISERFS_CHECK
1507 reiserfs_warning(s, 1516 reiserfs_warning(s,
1508 "journal-949: buffer write failed\n"); 1517 "journal-949: buffer write failed\n");
1509 #endif 1518 #endif
1510 err = -EIO; 1519 err = -EIO;
1511 } 1520 }
1512 /* note, we must clear the JDirty_wait bit after the up to date 1521 /* note, we must clear the JDirty_wait bit after the up to date
1513 ** check, otherwise we race against our flushpage routine 1522 ** check, otherwise we race against our flushpage routine
1514 */ 1523 */
1515 BUG_ON(!test_clear_buffer_journal_dirty 1524 BUG_ON(!test_clear_buffer_journal_dirty
1516 (cn->bh)); 1525 (cn->bh));
1517 1526
1518 /* undo the inc from journal_mark_dirty */ 1527 /* undo the inc from journal_mark_dirty */
1519 put_bh(cn->bh); 1528 put_bh(cn->bh);
1520 brelse(cn->bh); 1529 brelse(cn->bh);
1521 } 1530 }
1522 cn = cn->next; 1531 cn = cn->next;
1523 } 1532 }
1524 } 1533 }
1525 1534
1526 if (err) 1535 if (err)
1527 reiserfs_abort(s, -EIO, 1536 reiserfs_abort(s, -EIO,
1528 "Write error while pushing transaction to disk in %s", 1537 "Write error while pushing transaction to disk in %s",
1529 __FUNCTION__); 1538 __FUNCTION__);
1530 flush_older_and_return: 1539 flush_older_and_return:
1531 1540
1532 /* before we can update the journal header block, we _must_ flush all 1541 /* before we can update the journal header block, we _must_ flush all
1533 ** real blocks from all older transactions to disk. This is because 1542 ** real blocks from all older transactions to disk. This is because
1534 ** once the header block is updated, this transaction will not be 1543 ** once the header block is updated, this transaction will not be
1535 ** replayed after a crash 1544 ** replayed after a crash
1536 */ 1545 */
1537 if (flushall) { 1546 if (flushall) {
1538 flush_older_journal_lists(s, jl); 1547 flush_older_journal_lists(s, jl);
1539 } 1548 }
1540 1549
1541 err = journal->j_errno; 1550 err = journal->j_errno;
1542 /* before we can remove everything from the hash tables for this 1551 /* before we can remove everything from the hash tables for this
1543 ** transaction, we must make sure it can never be replayed 1552 ** transaction, we must make sure it can never be replayed
1544 ** 1553 **
1545 ** since we are only called from do_journal_end, we know for sure there 1554 ** since we are only called from do_journal_end, we know for sure there
1546 ** are no allocations going on while we are flushing journal lists. So, 1555 ** are no allocations going on while we are flushing journal lists. So,
1547 ** we only need to update the journal header block for the last list 1556 ** we only need to update the journal header block for the last list
1548 ** being flushed 1557 ** being flushed
1549 */ 1558 */
1550 if (!err && flushall) { 1559 if (!err && flushall) {
1551 err = 1560 err =
1552 update_journal_header_block(s, 1561 update_journal_header_block(s,
1553 (jl->j_start + jl->j_len + 1562 (jl->j_start + jl->j_len +
1554 2) % SB_ONDISK_JOURNAL_SIZE(s), 1563 2) % SB_ONDISK_JOURNAL_SIZE(s),
1555 jl->j_trans_id); 1564 jl->j_trans_id);
1556 if (err) 1565 if (err)
1557 reiserfs_abort(s, -EIO, 1566 reiserfs_abort(s, -EIO,
1558 "Write error while updating journal header in %s", 1567 "Write error while updating journal header in %s",
1559 __FUNCTION__); 1568 __FUNCTION__);
1560 } 1569 }
1561 remove_all_from_journal_list(s, jl, 0); 1570 remove_all_from_journal_list(s, jl, 0);
1562 list_del_init(&jl->j_list); 1571 list_del_init(&jl->j_list);
1563 journal->j_num_lists--; 1572 journal->j_num_lists--;
1564 del_from_work_list(s, jl); 1573 del_from_work_list(s, jl);
1565 1574
1566 if (journal->j_last_flush_id != 0 && 1575 if (journal->j_last_flush_id != 0 &&
1567 (jl->j_trans_id - journal->j_last_flush_id) != 1) { 1576 (jl->j_trans_id - journal->j_last_flush_id) != 1) {
1568 reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", 1577 reiserfs_warning(s, "clm-2201: last flush %lu, current %lu",
1569 journal->j_last_flush_id, jl->j_trans_id); 1578 journal->j_last_flush_id, jl->j_trans_id);
1570 } 1579 }
1571 journal->j_last_flush_id = jl->j_trans_id; 1580 journal->j_last_flush_id = jl->j_trans_id;
1572 1581
1573 /* not strictly required since we are freeing the list, but it should 1582 /* not strictly required since we are freeing the list, but it should
1574 * help find code using dead lists later on 1583 * help find code using dead lists later on
1575 */ 1584 */
1576 jl->j_len = 0; 1585 jl->j_len = 0;
1577 atomic_set(&(jl->j_nonzerolen), 0); 1586 atomic_set(&(jl->j_nonzerolen), 0);
1578 jl->j_start = 0; 1587 jl->j_start = 0;
1579 jl->j_realblock = NULL; 1588 jl->j_realblock = NULL;
1580 jl->j_commit_bh = NULL; 1589 jl->j_commit_bh = NULL;
1581 jl->j_trans_id = 0; 1590 jl->j_trans_id = 0;
1582 jl->j_state = 0; 1591 jl->j_state = 0;
1583 put_journal_list(s, jl); 1592 put_journal_list(s, jl);
1584 if (flushall) 1593 if (flushall)
1585 up(&journal->j_flush_sem); 1594 up(&journal->j_flush_sem);
1586 put_fs_excl(); 1595 put_fs_excl();
1587 return err; 1596 return err;
1588 } 1597 }
1589 1598
1590 static int write_one_transaction(struct super_block *s, 1599 static int write_one_transaction(struct super_block *s,
1591 struct reiserfs_journal_list *jl, 1600 struct reiserfs_journal_list *jl,
1592 struct buffer_chunk *chunk) 1601 struct buffer_chunk *chunk)
1593 { 1602 {
1594 struct reiserfs_journal_cnode *cn; 1603 struct reiserfs_journal_cnode *cn;
1595 int ret = 0; 1604 int ret = 0;
1596 1605
1597 jl->j_state |= LIST_TOUCHED; 1606 jl->j_state |= LIST_TOUCHED;
1598 del_from_work_list(s, jl); 1607 del_from_work_list(s, jl);
1599 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { 1608 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
1600 return 0; 1609 return 0;
1601 } 1610 }
1602 1611
1603 cn = jl->j_realblock; 1612 cn = jl->j_realblock;
1604 while (cn) { 1613 while (cn) {
1605 /* if the blocknr == 0, this has been cleared from the hash, 1614 /* if the blocknr == 0, this has been cleared from the hash,
1606 ** skip it 1615 ** skip it
1607 */ 1616 */
1608 if (cn->blocknr == 0) { 1617 if (cn->blocknr == 0) {
1609 goto next; 1618 goto next;
1610 } 1619 }
1611 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { 1620 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
1612 struct buffer_head *tmp_bh; 1621 struct buffer_head *tmp_bh;
1613 /* we can race against journal_mark_freed when we try 1622 /* we can race against journal_mark_freed when we try
1614 * to lock_buffer(cn->bh), so we have to inc the buffer 1623 * to lock_buffer(cn->bh), so we have to inc the buffer
1615 * count, and recheck things after locking 1624 * count, and recheck things after locking
1616 */ 1625 */
1617 tmp_bh = cn->bh; 1626 tmp_bh = cn->bh;
1618 get_bh(tmp_bh); 1627 get_bh(tmp_bh);
1619 lock_buffer(tmp_bh); 1628 lock_buffer(tmp_bh);
1620 if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { 1629 if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
1621 if (!buffer_journal_dirty(tmp_bh) || 1630 if (!buffer_journal_dirty(tmp_bh) ||
1622 buffer_journal_prepared(tmp_bh)) 1631 buffer_journal_prepared(tmp_bh))
1623 BUG(); 1632 BUG();
1624 add_to_chunk(chunk, tmp_bh, NULL, write_chunk); 1633 add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
1625 ret++; 1634 ret++;
1626 } else { 1635 } else {
1627 /* note, cn->bh might be null now */ 1636 /* note, cn->bh might be null now */
1628 unlock_buffer(tmp_bh); 1637 unlock_buffer(tmp_bh);
1629 } 1638 }
1630 put_bh(tmp_bh); 1639 put_bh(tmp_bh);
1631 } 1640 }
1632 next: 1641 next:
1633 cn = cn->next; 1642 cn = cn->next;
1634 cond_resched(); 1643 cond_resched();
1635 } 1644 }
1636 return ret; 1645 return ret;
1637 } 1646 }
1638 1647
1639 /* used by flush_commit_list */ 1648 /* used by flush_commit_list */
1640 static int dirty_one_transaction(struct super_block *s, 1649 static int dirty_one_transaction(struct super_block *s,
1641 struct reiserfs_journal_list *jl) 1650 struct reiserfs_journal_list *jl)
1642 { 1651 {
1643 struct reiserfs_journal_cnode *cn; 1652 struct reiserfs_journal_cnode *cn;
1644 struct reiserfs_journal_list *pjl; 1653 struct reiserfs_journal_list *pjl;
1645 int ret = 0; 1654 int ret = 0;
1646 1655
1647 jl->j_state |= LIST_DIRTY; 1656 jl->j_state |= LIST_DIRTY;
1648 cn = jl->j_realblock; 1657 cn = jl->j_realblock;
1649 while (cn) { 1658 while (cn) {
1650 /* look for a more recent transaction that logged this 1659 /* look for a more recent transaction that logged this
1651 ** buffer. Only the most recent transaction with a buffer in 1660 ** buffer. Only the most recent transaction with a buffer in
1652 ** it is allowed to send that buffer to disk 1661 ** it is allowed to send that buffer to disk
1653 */ 1662 */
1654 pjl = find_newer_jl_for_cn(cn); 1663 pjl = find_newer_jl_for_cn(cn);
1655 if (!pjl && cn->blocknr && cn->bh 1664 if (!pjl && cn->blocknr && cn->bh
1656 && buffer_journal_dirty(cn->bh)) { 1665 && buffer_journal_dirty(cn->bh)) {
1657 BUG_ON(!can_dirty(cn)); 1666 BUG_ON(!can_dirty(cn));
1658 /* if the buffer is prepared, it will either be logged 1667 /* if the buffer is prepared, it will either be logged
1659 * or restored. If restored, we need to make sure 1668 * or restored. If restored, we need to make sure
1660 * it actually gets marked dirty 1669 * it actually gets marked dirty
1661 */ 1670 */
1662 clear_buffer_journal_new(cn->bh); 1671 clear_buffer_journal_new(cn->bh);
1663 if (buffer_journal_prepared(cn->bh)) { 1672 if (buffer_journal_prepared(cn->bh)) {
1664 set_buffer_journal_restore_dirty(cn->bh); 1673 set_buffer_journal_restore_dirty(cn->bh);
1665 } else { 1674 } else {
1666 set_buffer_journal_test(cn->bh); 1675 set_buffer_journal_test(cn->bh);
1667 mark_buffer_dirty(cn->bh); 1676 mark_buffer_dirty(cn->bh);
1668 } 1677 }
1669 } 1678 }
1670 cn = cn->next; 1679 cn = cn->next;
1671 } 1680 }
1672 return ret; 1681 return ret;
1673 } 1682 }
1674 1683
1675 static int kupdate_transactions(struct super_block *s, 1684 static int kupdate_transactions(struct super_block *s,
1676 struct reiserfs_journal_list *jl, 1685 struct reiserfs_journal_list *jl,
1677 struct reiserfs_journal_list **next_jl, 1686 struct reiserfs_journal_list **next_jl,
1678 unsigned long *next_trans_id, 1687 unsigned long *next_trans_id,
1679 int num_blocks, int num_trans) 1688 int num_blocks, int num_trans)
1680 { 1689 {
1681 int ret = 0; 1690 int ret = 0;
1682 int written = 0; 1691 int written = 0;
1683 int transactions_flushed = 0; 1692 int transactions_flushed = 0;
1684 unsigned long orig_trans_id = jl->j_trans_id; 1693 unsigned long orig_trans_id = jl->j_trans_id;
1685 struct buffer_chunk chunk; 1694 struct buffer_chunk chunk;
1686 struct list_head *entry; 1695 struct list_head *entry;
1687 struct reiserfs_journal *journal = SB_JOURNAL(s); 1696 struct reiserfs_journal *journal = SB_JOURNAL(s);
1688 chunk.nr = 0; 1697 chunk.nr = 0;
1689 1698
1690 down(&journal->j_flush_sem); 1699 down(&journal->j_flush_sem);
1691 if (!journal_list_still_alive(s, orig_trans_id)) { 1700 if (!journal_list_still_alive(s, orig_trans_id)) {
1692 goto done; 1701 goto done;
1693 } 1702 }
1694 1703
1695 /* we've got j_flush_sem held, nobody is going to delete any 1704 /* we've got j_flush_sem held, nobody is going to delete any
1696 * of these lists out from underneath us 1705 * of these lists out from underneath us
1697 */ 1706 */
1698 while ((num_trans && transactions_flushed < num_trans) || 1707 while ((num_trans && transactions_flushed < num_trans) ||
1699 (!num_trans && written < num_blocks)) { 1708 (!num_trans && written < num_blocks)) {
1700 1709
1701 if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || 1710 if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1702 atomic_read(&jl->j_commit_left) 1711 atomic_read(&jl->j_commit_left)
1703 || !(jl->j_state & LIST_DIRTY)) { 1712 || !(jl->j_state & LIST_DIRTY)) {
1704 del_from_work_list(s, jl); 1713 del_from_work_list(s, jl);
1705 break; 1714 break;
1706 } 1715 }
1707 ret = write_one_transaction(s, jl, &chunk); 1716 ret = write_one_transaction(s, jl, &chunk);
1708 1717
1709 if (ret < 0) 1718 if (ret < 0)
1710 goto done; 1719 goto done;
1711 transactions_flushed++; 1720 transactions_flushed++;
1712 written += ret; 1721 written += ret;
1713 entry = jl->j_list.next; 1722 entry = jl->j_list.next;
1714 1723
1715 /* did we wrap? */ 1724 /* did we wrap? */
1716 if (entry == &journal->j_journal_list) { 1725 if (entry == &journal->j_journal_list) {
1717 break; 1726 break;
1718 } 1727 }
1719 jl = JOURNAL_LIST_ENTRY(entry); 1728 jl = JOURNAL_LIST_ENTRY(entry);
1720 1729
1721 /* don't bother with older transactions */ 1730 /* don't bother with older transactions */
1722 if (jl->j_trans_id <= orig_trans_id) 1731 if (jl->j_trans_id <= orig_trans_id)
1723 break; 1732 break;
1724 } 1733 }
1725 if (chunk.nr) { 1734 if (chunk.nr) {
1726 write_chunk(&chunk); 1735 write_chunk(&chunk);
1727 } 1736 }
1728 1737
1729 done: 1738 done:
1730 up(&journal->j_flush_sem); 1739 up(&journal->j_flush_sem);
1731 return ret; 1740 return ret;
1732 } 1741 }
1733 1742
1734 /* for o_sync and fsync heavy applications, they tend to use 1743 /* for o_sync and fsync heavy applications, they tend to use
1735 ** all the journa list slots with tiny transactions. These 1744 ** all the journa list slots with tiny transactions. These
1736 ** trigger lots and lots of calls to update the header block, which 1745 ** trigger lots and lots of calls to update the header block, which
1737 ** adds seeks and slows things down. 1746 ** adds seeks and slows things down.
1738 ** 1747 **
1739 ** This function tries to clear out a large chunk of the journal lists 1748 ** This function tries to clear out a large chunk of the journal lists
1740 ** at once, which makes everything faster since only the newest journal 1749 ** at once, which makes everything faster since only the newest journal
1741 ** list updates the header block 1750 ** list updates the header block
1742 */ 1751 */
1743 static int flush_used_journal_lists(struct super_block *s, 1752 static int flush_used_journal_lists(struct super_block *s,
1744 struct reiserfs_journal_list *jl) 1753 struct reiserfs_journal_list *jl)
1745 { 1754 {
1746 unsigned long len = 0; 1755 unsigned long len = 0;
1747 unsigned long cur_len; 1756 unsigned long cur_len;
1748 int ret; 1757 int ret;
1749 int i; 1758 int i;
1750 int limit = 256; 1759 int limit = 256;
1751 struct reiserfs_journal_list *tjl; 1760 struct reiserfs_journal_list *tjl;
1752 struct reiserfs_journal_list *flush_jl; 1761 struct reiserfs_journal_list *flush_jl;
1753 unsigned long trans_id; 1762 unsigned long trans_id;
1754 struct reiserfs_journal *journal = SB_JOURNAL(s); 1763 struct reiserfs_journal *journal = SB_JOURNAL(s);
1755 1764
1756 flush_jl = tjl = jl; 1765 flush_jl = tjl = jl;
1757 1766
1758 /* in data logging mode, try harder to flush a lot of blocks */ 1767 /* in data logging mode, try harder to flush a lot of blocks */
1759 if (reiserfs_data_log(s)) 1768 if (reiserfs_data_log(s))
1760 limit = 1024; 1769 limit = 1024;
1761 /* flush for 256 transactions or limit blocks, whichever comes first */ 1770 /* flush for 256 transactions or limit blocks, whichever comes first */
1762 for (i = 0; i < 256 && len < limit; i++) { 1771 for (i = 0; i < 256 && len < limit; i++) {
1763 if (atomic_read(&tjl->j_commit_left) || 1772 if (atomic_read(&tjl->j_commit_left) ||
1764 tjl->j_trans_id < jl->j_trans_id) { 1773 tjl->j_trans_id < jl->j_trans_id) {
1765 break; 1774 break;
1766 } 1775 }
1767 cur_len = atomic_read(&tjl->j_nonzerolen); 1776 cur_len = atomic_read(&tjl->j_nonzerolen);
1768 if (cur_len > 0) { 1777 if (cur_len > 0) {
1769 tjl->j_state &= ~LIST_TOUCHED; 1778 tjl->j_state &= ~LIST_TOUCHED;
1770 } 1779 }
1771 len += cur_len; 1780 len += cur_len;
1772 flush_jl = tjl; 1781 flush_jl = tjl;
1773 if (tjl->j_list.next == &journal->j_journal_list) 1782 if (tjl->j_list.next == &journal->j_journal_list)
1774 break; 1783 break;
1775 tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); 1784 tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
1776 } 1785 }
1777 /* try to find a group of blocks we can flush across all the 1786 /* try to find a group of blocks we can flush across all the
1778 ** transactions, but only bother if we've actually spanned 1787 ** transactions, but only bother if we've actually spanned
1779 ** across multiple lists 1788 ** across multiple lists
1780 */ 1789 */
1781 if (flush_jl != jl) { 1790 if (flush_jl != jl) {
1782 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 1791 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
1783 } 1792 }
1784 flush_journal_list(s, flush_jl, 1); 1793 flush_journal_list(s, flush_jl, 1);
1785 return 0; 1794 return 0;
1786 } 1795 }
1787 1796
1788 /* 1797 /*
1789 ** removes any nodes in table with name block and dev as bh. 1798 ** removes any nodes in table with name block and dev as bh.
1790 ** only touchs the hnext and hprev pointers. 1799 ** only touchs the hnext and hprev pointers.
1791 */ 1800 */
1792 void remove_journal_hash(struct super_block *sb, 1801 void remove_journal_hash(struct super_block *sb,
1793 struct reiserfs_journal_cnode **table, 1802 struct reiserfs_journal_cnode **table,
1794 struct reiserfs_journal_list *jl, 1803 struct reiserfs_journal_list *jl,
1795 unsigned long block, int remove_freed) 1804 unsigned long block, int remove_freed)
1796 { 1805 {
1797 struct reiserfs_journal_cnode *cur; 1806 struct reiserfs_journal_cnode *cur;
1798 struct reiserfs_journal_cnode **head; 1807 struct reiserfs_journal_cnode **head;
1799 1808
1800 head = &(journal_hash(table, sb, block)); 1809 head = &(journal_hash(table, sb, block));
1801 if (!head) { 1810 if (!head) {
1802 return; 1811 return;
1803 } 1812 }
1804 cur = *head; 1813 cur = *head;
1805 while (cur) { 1814 while (cur) {
1806 if (cur->blocknr == block && cur->sb == sb 1815 if (cur->blocknr == block && cur->sb == sb
1807 && (jl == NULL || jl == cur->jlist) 1816 && (jl == NULL || jl == cur->jlist)
1808 && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { 1817 && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
1809 if (cur->hnext) { 1818 if (cur->hnext) {
1810 cur->hnext->hprev = cur->hprev; 1819 cur->hnext->hprev = cur->hprev;
1811 } 1820 }
1812 if (cur->hprev) { 1821 if (cur->hprev) {
1813 cur->hprev->hnext = cur->hnext; 1822 cur->hprev->hnext = cur->hnext;
1814 } else { 1823 } else {
1815 *head = cur->hnext; 1824 *head = cur->hnext;
1816 } 1825 }
1817 cur->blocknr = 0; 1826 cur->blocknr = 0;
1818 cur->sb = NULL; 1827 cur->sb = NULL;
1819 cur->state = 0; 1828 cur->state = 0;
1820 if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ 1829 if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */
1821 atomic_dec(&(cur->jlist->j_nonzerolen)); 1830 atomic_dec(&(cur->jlist->j_nonzerolen));
1822 cur->bh = NULL; 1831 cur->bh = NULL;
1823 cur->jlist = NULL; 1832 cur->jlist = NULL;
1824 } 1833 }
1825 cur = cur->hnext; 1834 cur = cur->hnext;
1826 } 1835 }
1827 } 1836 }
1828 1837
1829 static void free_journal_ram(struct super_block *p_s_sb) 1838 static void free_journal_ram(struct super_block *p_s_sb)
1830 { 1839 {
1831 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1840 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1832 kfree(journal->j_current_jl); 1841 kfree(journal->j_current_jl);
1833 journal->j_num_lists--; 1842 journal->j_num_lists--;
1834 1843
1835 vfree(journal->j_cnode_free_orig); 1844 vfree(journal->j_cnode_free_orig);
1836 free_list_bitmaps(p_s_sb, journal->j_list_bitmap); 1845 free_list_bitmaps(p_s_sb, journal->j_list_bitmap);
1837 free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */ 1846 free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */
1838 if (journal->j_header_bh) { 1847 if (journal->j_header_bh) {
1839 brelse(journal->j_header_bh); 1848 brelse(journal->j_header_bh);
1840 } 1849 }
1841 /* j_header_bh is on the journal dev, make sure not to release the journal 1850 /* j_header_bh is on the journal dev, make sure not to release the journal
1842 * dev until we brelse j_header_bh 1851 * dev until we brelse j_header_bh
1843 */ 1852 */
1844 release_journal_dev(p_s_sb, journal); 1853 release_journal_dev(p_s_sb, journal);
1845 vfree(journal); 1854 vfree(journal);
1846 } 1855 }
1847 1856
1848 /* 1857 /*
1849 ** call on unmount. Only set error to 1 if you haven't made your way out 1858 ** call on unmount. Only set error to 1 if you haven't made your way out
1850 ** of read_super() yet. Any other caller must keep error at 0. 1859 ** of read_super() yet. Any other caller must keep error at 0.
1851 */ 1860 */
1852 static int do_journal_release(struct reiserfs_transaction_handle *th, 1861 static int do_journal_release(struct reiserfs_transaction_handle *th,
1853 struct super_block *p_s_sb, int error) 1862 struct super_block *p_s_sb, int error)
1854 { 1863 {
1855 struct reiserfs_transaction_handle myth; 1864 struct reiserfs_transaction_handle myth;
1856 int flushed = 0; 1865 int flushed = 0;
1857 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1866 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1858 1867
1859 /* we only want to flush out transactions if we were called with error == 0 1868 /* we only want to flush out transactions if we were called with error == 0
1860 */ 1869 */
1861 if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { 1870 if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
1862 /* end the current trans */ 1871 /* end the current trans */
1863 BUG_ON(!th->t_trans_id); 1872 BUG_ON(!th->t_trans_id);
1864 do_journal_end(th, p_s_sb, 10, FLUSH_ALL); 1873 do_journal_end(th, p_s_sb, 10, FLUSH_ALL);
1865 1874
1866 /* make sure something gets logged to force our way into the flush code */ 1875 /* make sure something gets logged to force our way into the flush code */
1867 if (!journal_join(&myth, p_s_sb, 1)) { 1876 if (!journal_join(&myth, p_s_sb, 1)) {
1868 reiserfs_prepare_for_journal(p_s_sb, 1877 reiserfs_prepare_for_journal(p_s_sb,
1869 SB_BUFFER_WITH_SB(p_s_sb), 1878 SB_BUFFER_WITH_SB(p_s_sb),
1870 1); 1879 1);
1871 journal_mark_dirty(&myth, p_s_sb, 1880 journal_mark_dirty(&myth, p_s_sb,
1872 SB_BUFFER_WITH_SB(p_s_sb)); 1881 SB_BUFFER_WITH_SB(p_s_sb));
1873 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); 1882 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
1874 flushed = 1; 1883 flushed = 1;
1875 } 1884 }
1876 } 1885 }
1877 1886
1878 /* this also catches errors during the do_journal_end above */ 1887 /* this also catches errors during the do_journal_end above */
1879 if (!error && reiserfs_is_journal_aborted(journal)) { 1888 if (!error && reiserfs_is_journal_aborted(journal)) {
1880 memset(&myth, 0, sizeof(myth)); 1889 memset(&myth, 0, sizeof(myth));
1881 if (!journal_join_abort(&myth, p_s_sb, 1)) { 1890 if (!journal_join_abort(&myth, p_s_sb, 1)) {
1882 reiserfs_prepare_for_journal(p_s_sb, 1891 reiserfs_prepare_for_journal(p_s_sb,
1883 SB_BUFFER_WITH_SB(p_s_sb), 1892 SB_BUFFER_WITH_SB(p_s_sb),
1884 1); 1893 1);
1885 journal_mark_dirty(&myth, p_s_sb, 1894 journal_mark_dirty(&myth, p_s_sb,
1886 SB_BUFFER_WITH_SB(p_s_sb)); 1895 SB_BUFFER_WITH_SB(p_s_sb));
1887 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); 1896 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
1888 } 1897 }
1889 } 1898 }
1890 1899
1891 reiserfs_mounted_fs_count--; 1900 reiserfs_mounted_fs_count--;
1892 /* wait for all commits to finish */ 1901 /* wait for all commits to finish */
1893 cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); 1902 cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work);
1894 flush_workqueue(commit_wq); 1903 flush_workqueue(commit_wq);
1895 if (!reiserfs_mounted_fs_count) { 1904 if (!reiserfs_mounted_fs_count) {
1896 destroy_workqueue(commit_wq); 1905 destroy_workqueue(commit_wq);
1897 commit_wq = NULL; 1906 commit_wq = NULL;
1898 } 1907 }
1899 1908
1900 free_journal_ram(p_s_sb); 1909 free_journal_ram(p_s_sb);
1901 1910
1902 return 0; 1911 return 0;
1903 } 1912 }
1904 1913
1905 /* 1914 /*
1906 ** call on unmount. flush all journal trans, release all alloc'd ram 1915 ** call on unmount. flush all journal trans, release all alloc'd ram
1907 */ 1916 */
1908 int journal_release(struct reiserfs_transaction_handle *th, 1917 int journal_release(struct reiserfs_transaction_handle *th,
1909 struct super_block *p_s_sb) 1918 struct super_block *p_s_sb)
1910 { 1919 {
1911 return do_journal_release(th, p_s_sb, 0); 1920 return do_journal_release(th, p_s_sb, 0);
1912 } 1921 }
1913 1922
1914 /* 1923 /*
1915 ** only call from an error condition inside reiserfs_read_super! 1924 ** only call from an error condition inside reiserfs_read_super!
1916 */ 1925 */
1917 int journal_release_error(struct reiserfs_transaction_handle *th, 1926 int journal_release_error(struct reiserfs_transaction_handle *th,
1918 struct super_block *p_s_sb) 1927 struct super_block *p_s_sb)
1919 { 1928 {
1920 return do_journal_release(th, p_s_sb, 1); 1929 return do_journal_release(th, p_s_sb, 1);
1921 } 1930 }
1922 1931
1923 /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 1932 /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */
1924 static int journal_compare_desc_commit(struct super_block *p_s_sb, 1933 static int journal_compare_desc_commit(struct super_block *p_s_sb,
1925 struct reiserfs_journal_desc *desc, 1934 struct reiserfs_journal_desc *desc,
1926 struct reiserfs_journal_commit *commit) 1935 struct reiserfs_journal_commit *commit)
1927 { 1936 {
1928 if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || 1937 if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
1929 get_commit_trans_len(commit) != get_desc_trans_len(desc) || 1938 get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
1930 get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max || 1939 get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max ||
1931 get_commit_trans_len(commit) <= 0) { 1940 get_commit_trans_len(commit) <= 0) {
1932 return 1; 1941 return 1;
1933 } 1942 }
1934 return 0; 1943 return 0;
1935 } 1944 }
1936 1945
1937 /* returns 0 if it did not find a description block 1946 /* returns 0 if it did not find a description block
1938 ** returns -1 if it found a corrupt commit block 1947 ** returns -1 if it found a corrupt commit block
1939 ** returns 1 if both desc and commit were valid 1948 ** returns 1 if both desc and commit were valid
1940 */ 1949 */
1941 static int journal_transaction_is_valid(struct super_block *p_s_sb, 1950 static int journal_transaction_is_valid(struct super_block *p_s_sb,
1942 struct buffer_head *d_bh, 1951 struct buffer_head *d_bh,
1943 unsigned long *oldest_invalid_trans_id, 1952 unsigned long *oldest_invalid_trans_id,
1944 unsigned long *newest_mount_id) 1953 unsigned long *newest_mount_id)
1945 { 1954 {
1946 struct reiserfs_journal_desc *desc; 1955 struct reiserfs_journal_desc *desc;
1947 struct reiserfs_journal_commit *commit; 1956 struct reiserfs_journal_commit *commit;
1948 struct buffer_head *c_bh; 1957 struct buffer_head *c_bh;
1949 unsigned long offset; 1958 unsigned long offset;
1950 1959
1951 if (!d_bh) 1960 if (!d_bh)
1952 return 0; 1961 return 0;
1953 1962
1954 desc = (struct reiserfs_journal_desc *)d_bh->b_data; 1963 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
1955 if (get_desc_trans_len(desc) > 0 1964 if (get_desc_trans_len(desc) > 0
1956 && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { 1965 && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
1957 if (oldest_invalid_trans_id && *oldest_invalid_trans_id 1966 if (oldest_invalid_trans_id && *oldest_invalid_trans_id
1958 && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { 1967 && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
1959 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 1968 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1960 "journal-986: transaction " 1969 "journal-986: transaction "
1961 "is valid returning because trans_id %d is greater than " 1970 "is valid returning because trans_id %d is greater than "
1962 "oldest_invalid %lu", 1971 "oldest_invalid %lu",
1963 get_desc_trans_id(desc), 1972 get_desc_trans_id(desc),
1964 *oldest_invalid_trans_id); 1973 *oldest_invalid_trans_id);
1965 return 0; 1974 return 0;
1966 } 1975 }
1967 if (newest_mount_id 1976 if (newest_mount_id
1968 && *newest_mount_id > get_desc_mount_id(desc)) { 1977 && *newest_mount_id > get_desc_mount_id(desc)) {
1969 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 1978 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1970 "journal-1087: transaction " 1979 "journal-1087: transaction "
1971 "is valid returning because mount_id %d is less than " 1980 "is valid returning because mount_id %d is less than "
1972 "newest_mount_id %lu", 1981 "newest_mount_id %lu",
1973 get_desc_mount_id(desc), 1982 get_desc_mount_id(desc),
1974 *newest_mount_id); 1983 *newest_mount_id);
1975 return -1; 1984 return -1;
1976 } 1985 }
1977 if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) { 1986 if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) {
1978 reiserfs_warning(p_s_sb, 1987 reiserfs_warning(p_s_sb,
1979 "journal-2018: Bad transaction length %d encountered, ignoring transaction", 1988 "journal-2018: Bad transaction length %d encountered, ignoring transaction",
1980 get_desc_trans_len(desc)); 1989 get_desc_trans_len(desc));
1981 return -1; 1990 return -1;
1982 } 1991 }
1983 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 1992 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
1984 1993
1985 /* ok, we have a journal description block, lets see if the transaction was valid */ 1994 /* ok, we have a journal description block, lets see if the transaction was valid */
1986 c_bh = 1995 c_bh =
1987 journal_bread(p_s_sb, 1996 journal_bread(p_s_sb,
1988 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 1997 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
1989 ((offset + get_desc_trans_len(desc) + 1998 ((offset + get_desc_trans_len(desc) +
1990 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 1999 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
1991 if (!c_bh) 2000 if (!c_bh)
1992 return 0; 2001 return 0;
1993 commit = (struct reiserfs_journal_commit *)c_bh->b_data; 2002 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
1994 if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 2003 if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
1995 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2004 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1996 "journal_transaction_is_valid, commit offset %ld had bad " 2005 "journal_transaction_is_valid, commit offset %ld had bad "
1997 "time %d or length %d", 2006 "time %d or length %d",
1998 c_bh->b_blocknr - 2007 c_bh->b_blocknr -
1999 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2008 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2000 get_commit_trans_id(commit), 2009 get_commit_trans_id(commit),
2001 get_commit_trans_len(commit)); 2010 get_commit_trans_len(commit));
2002 brelse(c_bh); 2011 brelse(c_bh);
2003 if (oldest_invalid_trans_id) { 2012 if (oldest_invalid_trans_id) {
2004 *oldest_invalid_trans_id = 2013 *oldest_invalid_trans_id =
2005 get_desc_trans_id(desc); 2014 get_desc_trans_id(desc);
2006 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2015 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2007 "journal-1004: " 2016 "journal-1004: "
2008 "transaction_is_valid setting oldest invalid trans_id " 2017 "transaction_is_valid setting oldest invalid trans_id "
2009 "to %d", 2018 "to %d",
2010 get_desc_trans_id(desc)); 2019 get_desc_trans_id(desc));
2011 } 2020 }
2012 return -1; 2021 return -1;
2013 } 2022 }
2014 brelse(c_bh); 2023 brelse(c_bh);
2015 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2024 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2016 "journal-1006: found valid " 2025 "journal-1006: found valid "
2017 "transaction start offset %llu, len %d id %d", 2026 "transaction start offset %llu, len %d id %d",
2018 d_bh->b_blocknr - 2027 d_bh->b_blocknr -
2019 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2028 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2020 get_desc_trans_len(desc), 2029 get_desc_trans_len(desc),
2021 get_desc_trans_id(desc)); 2030 get_desc_trans_id(desc));
2022 return 1; 2031 return 1;
2023 } else { 2032 } else {
2024 return 0; 2033 return 0;
2025 } 2034 }
2026 } 2035 }
2027 2036
2028 static void brelse_array(struct buffer_head **heads, int num) 2037 static void brelse_array(struct buffer_head **heads, int num)
2029 { 2038 {
2030 int i; 2039 int i;
2031 for (i = 0; i < num; i++) { 2040 for (i = 0; i < num; i++) {
2032 brelse(heads[i]); 2041 brelse(heads[i]);
2033 } 2042 }
2034 } 2043 }
2035 2044
2036 /* 2045 /*
2037 ** given the start, and values for the oldest acceptable transactions, 2046 ** given the start, and values for the oldest acceptable transactions,
2038 ** this either reads in a replays a transaction, or returns because the transaction 2047 ** this either reads in a replays a transaction, or returns because the transaction
2039 ** is invalid, or too old. 2048 ** is invalid, or too old.
2040 */ 2049 */
2041 static int journal_read_transaction(struct super_block *p_s_sb, 2050 static int journal_read_transaction(struct super_block *p_s_sb,
2042 unsigned long cur_dblock, 2051 unsigned long cur_dblock,
2043 unsigned long oldest_start, 2052 unsigned long oldest_start,
2044 unsigned long oldest_trans_id, 2053 unsigned long oldest_trans_id,
2045 unsigned long newest_mount_id) 2054 unsigned long newest_mount_id)
2046 { 2055 {
2047 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2056 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2048 struct reiserfs_journal_desc *desc; 2057 struct reiserfs_journal_desc *desc;
2049 struct reiserfs_journal_commit *commit; 2058 struct reiserfs_journal_commit *commit;
2050 unsigned long trans_id = 0; 2059 unsigned long trans_id = 0;
2051 struct buffer_head *c_bh; 2060 struct buffer_head *c_bh;
2052 struct buffer_head *d_bh; 2061 struct buffer_head *d_bh;
2053 struct buffer_head **log_blocks = NULL; 2062 struct buffer_head **log_blocks = NULL;
2054 struct buffer_head **real_blocks = NULL; 2063 struct buffer_head **real_blocks = NULL;
2055 unsigned long trans_offset; 2064 unsigned long trans_offset;
2056 int i; 2065 int i;
2057 int trans_half; 2066 int trans_half;
2058 2067
2059 d_bh = journal_bread(p_s_sb, cur_dblock); 2068 d_bh = journal_bread(p_s_sb, cur_dblock);
2060 if (!d_bh) 2069 if (!d_bh)
2061 return 1; 2070 return 1;
2062 desc = (struct reiserfs_journal_desc *)d_bh->b_data; 2071 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2063 trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 2072 trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
2064 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " 2073 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: "
2065 "journal_read_transaction, offset %llu, len %d mount_id %d", 2074 "journal_read_transaction, offset %llu, len %d mount_id %d",
2066 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2075 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2067 get_desc_trans_len(desc), get_desc_mount_id(desc)); 2076 get_desc_trans_len(desc), get_desc_mount_id(desc));
2068 if (get_desc_trans_id(desc) < oldest_trans_id) { 2077 if (get_desc_trans_id(desc) < oldest_trans_id) {
2069 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " 2078 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: "
2070 "journal_read_trans skipping because %lu is too old", 2079 "journal_read_trans skipping because %lu is too old",
2071 cur_dblock - 2080 cur_dblock -
2072 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); 2081 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
2073 brelse(d_bh); 2082 brelse(d_bh);
2074 return 1; 2083 return 1;
2075 } 2084 }
2076 if (get_desc_mount_id(desc) != newest_mount_id) { 2085 if (get_desc_mount_id(desc) != newest_mount_id) {
2077 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " 2086 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: "
2078 "journal_read_trans skipping because %d is != " 2087 "journal_read_trans skipping because %d is != "
2079 "newest_mount_id %lu", get_desc_mount_id(desc), 2088 "newest_mount_id %lu", get_desc_mount_id(desc),
2080 newest_mount_id); 2089 newest_mount_id);
2081 brelse(d_bh); 2090 brelse(d_bh);
2082 return 1; 2091 return 1;
2083 } 2092 }
2084 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2093 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2085 ((trans_offset + get_desc_trans_len(desc) + 1) % 2094 ((trans_offset + get_desc_trans_len(desc) + 1) %
2086 SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 2095 SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
2087 if (!c_bh) { 2096 if (!c_bh) {
2088 brelse(d_bh); 2097 brelse(d_bh);
2089 return 1; 2098 return 1;
2090 } 2099 }
2091 commit = (struct reiserfs_journal_commit *)c_bh->b_data; 2100 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2092 if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 2101 if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
2093 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2102 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2094 "journal_read_transaction, " 2103 "journal_read_transaction, "
2095 "commit offset %llu had bad time %d or length %d", 2104 "commit offset %llu had bad time %d or length %d",
2096 c_bh->b_blocknr - 2105 c_bh->b_blocknr -
2097 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2106 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2098 get_commit_trans_id(commit), 2107 get_commit_trans_id(commit),
2099 get_commit_trans_len(commit)); 2108 get_commit_trans_len(commit));
2100 brelse(c_bh); 2109 brelse(c_bh);
2101 brelse(d_bh); 2110 brelse(d_bh);
2102 return 1; 2111 return 1;
2103 } 2112 }
2104 trans_id = get_desc_trans_id(desc); 2113 trans_id = get_desc_trans_id(desc);
2105 /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2114 /* now we know we've got a good transaction, and it was inside the valid time ranges */
2106 log_blocks = kmalloc(get_desc_trans_len(desc) * 2115 log_blocks = kmalloc(get_desc_trans_len(desc) *
2107 sizeof(struct buffer_head *), GFP_NOFS); 2116 sizeof(struct buffer_head *), GFP_NOFS);
2108 real_blocks = kmalloc(get_desc_trans_len(desc) * 2117 real_blocks = kmalloc(get_desc_trans_len(desc) *
2109 sizeof(struct buffer_head *), GFP_NOFS); 2118 sizeof(struct buffer_head *), GFP_NOFS);
2110 if (!log_blocks || !real_blocks) { 2119 if (!log_blocks || !real_blocks) {
2111 brelse(c_bh); 2120 brelse(c_bh);
2112 brelse(d_bh); 2121 brelse(d_bh);
2113 kfree(log_blocks); 2122 kfree(log_blocks);
2114 kfree(real_blocks); 2123 kfree(real_blocks);
2115 reiserfs_warning(p_s_sb, 2124 reiserfs_warning(p_s_sb,
2116 "journal-1169: kmalloc failed, unable to mount FS"); 2125 "journal-1169: kmalloc failed, unable to mount FS");
2117 return -1; 2126 return -1;
2118 } 2127 }
2119 /* get all the buffer heads */ 2128 /* get all the buffer heads */
2120 trans_half = journal_trans_half(p_s_sb->s_blocksize); 2129 trans_half = journal_trans_half(p_s_sb->s_blocksize);
2121 for (i = 0; i < get_desc_trans_len(desc); i++) { 2130 for (i = 0; i < get_desc_trans_len(desc); i++) {
2122 log_blocks[i] = 2131 log_blocks[i] =
2123 journal_getblk(p_s_sb, 2132 journal_getblk(p_s_sb,
2124 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2133 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2125 (trans_offset + 1 + 2134 (trans_offset + 1 +
2126 i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2135 i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2127 if (i < trans_half) { 2136 if (i < trans_half) {
2128 real_blocks[i] = 2137 real_blocks[i] =
2129 sb_getblk(p_s_sb, 2138 sb_getblk(p_s_sb,
2130 le32_to_cpu(desc->j_realblock[i])); 2139 le32_to_cpu(desc->j_realblock[i]));
2131 } else { 2140 } else {
2132 real_blocks[i] = 2141 real_blocks[i] =
2133 sb_getblk(p_s_sb, 2142 sb_getblk(p_s_sb,
2134 le32_to_cpu(commit-> 2143 le32_to_cpu(commit->
2135 j_realblock[i - trans_half])); 2144 j_realblock[i - trans_half]));
2136 } 2145 }
2137 if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { 2146 if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2138 reiserfs_warning(p_s_sb, 2147 reiserfs_warning(p_s_sb,
2139 "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); 2148 "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem");
2140 goto abort_replay; 2149 goto abort_replay;
2141 } 2150 }
2142 /* make sure we don't try to replay onto log or reserved area */ 2151 /* make sure we don't try to replay onto log or reserved area */
2143 if (is_block_in_log_or_reserved_area 2152 if (is_block_in_log_or_reserved_area
2144 (p_s_sb, real_blocks[i]->b_blocknr)) { 2153 (p_s_sb, real_blocks[i]->b_blocknr)) {
2145 reiserfs_warning(p_s_sb, 2154 reiserfs_warning(p_s_sb,
2146 "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block"); 2155 "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block");
2147 abort_replay: 2156 abort_replay:
2148 brelse_array(log_blocks, i); 2157 brelse_array(log_blocks, i);
2149 brelse_array(real_blocks, i); 2158 brelse_array(real_blocks, i);
2150 brelse(c_bh); 2159 brelse(c_bh);
2151 brelse(d_bh); 2160 brelse(d_bh);
2152 kfree(log_blocks); 2161 kfree(log_blocks);
2153 kfree(real_blocks); 2162 kfree(real_blocks);
2154 return -1; 2163 return -1;
2155 } 2164 }
2156 } 2165 }
2157 /* read in the log blocks, memcpy to the corresponding real block */ 2166 /* read in the log blocks, memcpy to the corresponding real block */
2158 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); 2167 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
2159 for (i = 0; i < get_desc_trans_len(desc); i++) { 2168 for (i = 0; i < get_desc_trans_len(desc); i++) {
2160 wait_on_buffer(log_blocks[i]); 2169 wait_on_buffer(log_blocks[i]);
2161 if (!buffer_uptodate(log_blocks[i])) { 2170 if (!buffer_uptodate(log_blocks[i])) {
2162 reiserfs_warning(p_s_sb, 2171 reiserfs_warning(p_s_sb,
2163 "journal-1212: REPLAY FAILURE fsck required! buffer write failed"); 2172 "journal-1212: REPLAY FAILURE fsck required! buffer write failed");
2164 brelse_array(log_blocks + i, 2173 brelse_array(log_blocks + i,
2165 get_desc_trans_len(desc) - i); 2174 get_desc_trans_len(desc) - i);
2166 brelse_array(real_blocks, get_desc_trans_len(desc)); 2175 brelse_array(real_blocks, get_desc_trans_len(desc));
2167 brelse(c_bh); 2176 brelse(c_bh);
2168 brelse(d_bh); 2177 brelse(d_bh);
2169 kfree(log_blocks); 2178 kfree(log_blocks);
2170 kfree(real_blocks); 2179 kfree(real_blocks);
2171 return -1; 2180 return -1;
2172 } 2181 }
2173 memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, 2182 memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2174 real_blocks[i]->b_size); 2183 real_blocks[i]->b_size);
2175 set_buffer_uptodate(real_blocks[i]); 2184 set_buffer_uptodate(real_blocks[i]);
2176 brelse(log_blocks[i]); 2185 brelse(log_blocks[i]);
2177 } 2186 }
2178 /* flush out the real blocks */ 2187 /* flush out the real blocks */
2179 for (i = 0; i < get_desc_trans_len(desc); i++) { 2188 for (i = 0; i < get_desc_trans_len(desc); i++) {
2180 set_buffer_dirty(real_blocks[i]); 2189 set_buffer_dirty(real_blocks[i]);
2181 ll_rw_block(SWRITE, 1, real_blocks + i); 2190 ll_rw_block(SWRITE, 1, real_blocks + i);
2182 } 2191 }
2183 for (i = 0; i < get_desc_trans_len(desc); i++) { 2192 for (i = 0; i < get_desc_trans_len(desc); i++) {
2184 wait_on_buffer(real_blocks[i]); 2193 wait_on_buffer(real_blocks[i]);
2185 if (!buffer_uptodate(real_blocks[i])) { 2194 if (!buffer_uptodate(real_blocks[i])) {
2186 reiserfs_warning(p_s_sb, 2195 reiserfs_warning(p_s_sb,
2187 "journal-1226: REPLAY FAILURE, fsck required! buffer write failed"); 2196 "journal-1226: REPLAY FAILURE, fsck required! buffer write failed");
2188 brelse_array(real_blocks + i, 2197 brelse_array(real_blocks + i,
2189 get_desc_trans_len(desc) - i); 2198 get_desc_trans_len(desc) - i);
2190 brelse(c_bh); 2199 brelse(c_bh);
2191 brelse(d_bh); 2200 brelse(d_bh);
2192 kfree(log_blocks); 2201 kfree(log_blocks);
2193 kfree(real_blocks); 2202 kfree(real_blocks);
2194 return -1; 2203 return -1;
2195 } 2204 }
2196 brelse(real_blocks[i]); 2205 brelse(real_blocks[i]);
2197 } 2206 }
2198 cur_dblock = 2207 cur_dblock =
2199 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2208 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2200 ((trans_offset + get_desc_trans_len(desc) + 2209 ((trans_offset + get_desc_trans_len(desc) +
2201 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2210 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2202 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2211 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2203 "journal-1095: setting journal " "start to offset %ld", 2212 "journal-1095: setting journal " "start to offset %ld",
2204 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); 2213 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
2205 2214
2206 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 2215 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2207 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 2216 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
2208 journal->j_last_flush_trans_id = trans_id; 2217 journal->j_last_flush_trans_id = trans_id;
2209 journal->j_trans_id = trans_id + 1; 2218 journal->j_trans_id = trans_id + 1;
2210 brelse(c_bh); 2219 brelse(c_bh);
2211 brelse(d_bh); 2220 brelse(d_bh);
2212 kfree(log_blocks); 2221 kfree(log_blocks);
2213 kfree(real_blocks); 2222 kfree(real_blocks);
2214 return 0; 2223 return 0;
2215 } 2224 }
2216 2225
2217 /* This function reads blocks starting from block and to max_block of bufsize 2226 /* This function reads blocks starting from block and to max_block of bufsize
2218 size (but no more than BUFNR blocks at a time). This proved to improve 2227 size (but no more than BUFNR blocks at a time). This proved to improve
2219 mounting speed on self-rebuilding raid5 arrays at least. 2228 mounting speed on self-rebuilding raid5 arrays at least.
2220 Right now it is only used from journal code. But later we might use it 2229 Right now it is only used from journal code. But later we might use it
2221 from other places. 2230 from other places.
2222 Note: Do not use journal_getblk/sb_getblk functions here! */ 2231 Note: Do not use journal_getblk/sb_getblk functions here! */
2223 static struct buffer_head *reiserfs_breada(struct block_device *dev, int block, 2232 static struct buffer_head *reiserfs_breada(struct block_device *dev, int block,
2224 int bufsize, unsigned int max_block) 2233 int bufsize, unsigned int max_block)
2225 { 2234 {
2226 struct buffer_head *bhlist[BUFNR]; 2235 struct buffer_head *bhlist[BUFNR];
2227 unsigned int blocks = BUFNR; 2236 unsigned int blocks = BUFNR;
2228 struct buffer_head *bh; 2237 struct buffer_head *bh;
2229 int i, j; 2238 int i, j;
2230 2239
2231 bh = __getblk(dev, block, bufsize); 2240 bh = __getblk(dev, block, bufsize);
2232 if (buffer_uptodate(bh)) 2241 if (buffer_uptodate(bh))
2233 return (bh); 2242 return (bh);
2234 2243
2235 if (block + BUFNR > max_block) { 2244 if (block + BUFNR > max_block) {
2236 blocks = max_block - block; 2245 blocks = max_block - block;
2237 } 2246 }
2238 bhlist[0] = bh; 2247 bhlist[0] = bh;
2239 j = 1; 2248 j = 1;
2240 for (i = 1; i < blocks; i++) { 2249 for (i = 1; i < blocks; i++) {
2241 bh = __getblk(dev, block + i, bufsize); 2250 bh = __getblk(dev, block + i, bufsize);
2242 if (buffer_uptodate(bh)) { 2251 if (buffer_uptodate(bh)) {
2243 brelse(bh); 2252 brelse(bh);
2244 break; 2253 break;
2245 } else 2254 } else
2246 bhlist[j++] = bh; 2255 bhlist[j++] = bh;
2247 } 2256 }
2248 ll_rw_block(READ, j, bhlist); 2257 ll_rw_block(READ, j, bhlist);
2249 for (i = 1; i < j; i++) 2258 for (i = 1; i < j; i++)
2250 brelse(bhlist[i]); 2259 brelse(bhlist[i]);
2251 bh = bhlist[0]; 2260 bh = bhlist[0];
2252 wait_on_buffer(bh); 2261 wait_on_buffer(bh);
2253 if (buffer_uptodate(bh)) 2262 if (buffer_uptodate(bh))
2254 return bh; 2263 return bh;
2255 brelse(bh); 2264 brelse(bh);
2256 return NULL; 2265 return NULL;
2257 } 2266 }
2258 2267
2259 /* 2268 /*
2260 ** read and replay the log 2269 ** read and replay the log
2261 ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid 2270 ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
2262 ** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast. 2271 ** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast.
2263 ** 2272 **
2264 ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid. 2273 ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
2265 ** 2274 **
2266 ** On exit, it sets things up so the first transaction will work correctly. 2275 ** On exit, it sets things up so the first transaction will work correctly.
2267 */ 2276 */
2268 static int journal_read(struct super_block *p_s_sb) 2277 static int journal_read(struct super_block *p_s_sb)
2269 { 2278 {
2270 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2279 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2271 struct reiserfs_journal_desc *desc; 2280 struct reiserfs_journal_desc *desc;
2272 unsigned long oldest_trans_id = 0; 2281 unsigned long oldest_trans_id = 0;
2273 unsigned long oldest_invalid_trans_id = 0; 2282 unsigned long oldest_invalid_trans_id = 0;
2274 time_t start; 2283 time_t start;
2275 unsigned long oldest_start = 0; 2284 unsigned long oldest_start = 0;
2276 unsigned long cur_dblock = 0; 2285 unsigned long cur_dblock = 0;
2277 unsigned long newest_mount_id = 9; 2286 unsigned long newest_mount_id = 9;
2278 struct buffer_head *d_bh; 2287 struct buffer_head *d_bh;
2279 struct reiserfs_journal_header *jh; 2288 struct reiserfs_journal_header *jh;
2280 int valid_journal_header = 0; 2289 int valid_journal_header = 0;
2281 int replay_count = 0; 2290 int replay_count = 0;
2282 int continue_replay = 1; 2291 int continue_replay = 1;
2283 int ret; 2292 int ret;
2284 char b[BDEVNAME_SIZE]; 2293 char b[BDEVNAME_SIZE];
2285 2294
2286 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 2295 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
2287 reiserfs_info(p_s_sb, "checking transaction log (%s)\n", 2296 reiserfs_info(p_s_sb, "checking transaction log (%s)\n",
2288 bdevname(journal->j_dev_bd, b)); 2297 bdevname(journal->j_dev_bd, b));
2289 start = get_seconds(); 2298 start = get_seconds();
2290 2299
2291 /* step 1, read in the journal header block. Check the transaction it says 2300 /* step 1, read in the journal header block. Check the transaction it says
2292 ** is the first unflushed, and if that transaction is not valid, 2301 ** is the first unflushed, and if that transaction is not valid,
2293 ** replay is done 2302 ** replay is done
2294 */ 2303 */
2295 journal->j_header_bh = journal_bread(p_s_sb, 2304 journal->j_header_bh = journal_bread(p_s_sb,
2296 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) 2305 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)
2297 + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2306 + SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2298 if (!journal->j_header_bh) { 2307 if (!journal->j_header_bh) {
2299 return 1; 2308 return 1;
2300 } 2309 }
2301 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); 2310 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2302 if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && 2311 if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
2303 le32_to_cpu(jh->j_first_unflushed_offset) < 2312 le32_to_cpu(jh->j_first_unflushed_offset) <
2304 SB_ONDISK_JOURNAL_SIZE(p_s_sb) 2313 SB_ONDISK_JOURNAL_SIZE(p_s_sb)
2305 && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { 2314 && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2306 oldest_start = 2315 oldest_start =
2307 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2316 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2308 le32_to_cpu(jh->j_first_unflushed_offset); 2317 le32_to_cpu(jh->j_first_unflushed_offset);
2309 oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; 2318 oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2310 newest_mount_id = le32_to_cpu(jh->j_mount_id); 2319 newest_mount_id = le32_to_cpu(jh->j_mount_id);
2311 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2320 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2312 "journal-1153: found in " 2321 "journal-1153: found in "
2313 "header: first_unflushed_offset %d, last_flushed_trans_id " 2322 "header: first_unflushed_offset %d, last_flushed_trans_id "
2314 "%lu", le32_to_cpu(jh->j_first_unflushed_offset), 2323 "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
2315 le32_to_cpu(jh->j_last_flush_trans_id)); 2324 le32_to_cpu(jh->j_last_flush_trans_id));
2316 valid_journal_header = 1; 2325 valid_journal_header = 1;
2317 2326
2318 /* now, we try to read the first unflushed offset. If it is not valid, 2327 /* now, we try to read the first unflushed offset. If it is not valid,
2319 ** there is nothing more we can do, and it makes no sense to read 2328 ** there is nothing more we can do, and it makes no sense to read
2320 ** through the whole log. 2329 ** through the whole log.
2321 */ 2330 */
2322 d_bh = 2331 d_bh =
2323 journal_bread(p_s_sb, 2332 journal_bread(p_s_sb,
2324 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2333 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2325 le32_to_cpu(jh->j_first_unflushed_offset)); 2334 le32_to_cpu(jh->j_first_unflushed_offset));
2326 ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL); 2335 ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL);
2327 if (!ret) { 2336 if (!ret) {
2328 continue_replay = 0; 2337 continue_replay = 0;
2329 } 2338 }
2330 brelse(d_bh); 2339 brelse(d_bh);
2331 goto start_log_replay; 2340 goto start_log_replay;
2332 } 2341 }
2333 2342
2334 if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { 2343 if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) {
2335 reiserfs_warning(p_s_sb, 2344 reiserfs_warning(p_s_sb,
2336 "clm-2076: device is readonly, unable to replay log"); 2345 "clm-2076: device is readonly, unable to replay log");
2337 return -1; 2346 return -1;
2338 } 2347 }
2339 2348
2340 /* ok, there are transactions that need to be replayed. start with the first log block, find 2349 /* ok, there are transactions that need to be replayed. start with the first log block, find
2341 ** all the valid transactions, and pick out the oldest. 2350 ** all the valid transactions, and pick out the oldest.
2342 */ 2351 */
2343 while (continue_replay 2352 while (continue_replay
2344 && cur_dblock < 2353 && cur_dblock <
2345 (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2354 (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2346 SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { 2355 SB_ONDISK_JOURNAL_SIZE(p_s_sb))) {
2347 /* Note that it is required for blocksize of primary fs device and journal 2356 /* Note that it is required for blocksize of primary fs device and journal
2348 device to be the same */ 2357 device to be the same */
2349 d_bh = 2358 d_bh =
2350 reiserfs_breada(journal->j_dev_bd, cur_dblock, 2359 reiserfs_breada(journal->j_dev_bd, cur_dblock,
2351 p_s_sb->s_blocksize, 2360 p_s_sb->s_blocksize,
2352 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2361 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2353 SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2362 SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2354 ret = 2363 ret =
2355 journal_transaction_is_valid(p_s_sb, d_bh, 2364 journal_transaction_is_valid(p_s_sb, d_bh,
2356 &oldest_invalid_trans_id, 2365 &oldest_invalid_trans_id,
2357 &newest_mount_id); 2366 &newest_mount_id);
2358 if (ret == 1) { 2367 if (ret == 1) {
2359 desc = (struct reiserfs_journal_desc *)d_bh->b_data; 2368 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2360 if (oldest_start == 0) { /* init all oldest_ values */ 2369 if (oldest_start == 0) { /* init all oldest_ values */
2361 oldest_trans_id = get_desc_trans_id(desc); 2370 oldest_trans_id = get_desc_trans_id(desc);
2362 oldest_start = d_bh->b_blocknr; 2371 oldest_start = d_bh->b_blocknr;
2363 newest_mount_id = get_desc_mount_id(desc); 2372 newest_mount_id = get_desc_mount_id(desc);
2364 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2373 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2365 "journal-1179: Setting " 2374 "journal-1179: Setting "
2366 "oldest_start to offset %llu, trans_id %lu", 2375 "oldest_start to offset %llu, trans_id %lu",
2367 oldest_start - 2376 oldest_start -
2368 SB_ONDISK_JOURNAL_1st_BLOCK 2377 SB_ONDISK_JOURNAL_1st_BLOCK
2369 (p_s_sb), oldest_trans_id); 2378 (p_s_sb), oldest_trans_id);
2370 } else if (oldest_trans_id > get_desc_trans_id(desc)) { 2379 } else if (oldest_trans_id > get_desc_trans_id(desc)) {
2371 /* one we just read was older */ 2380 /* one we just read was older */
2372 oldest_trans_id = get_desc_trans_id(desc); 2381 oldest_trans_id = get_desc_trans_id(desc);
2373 oldest_start = d_bh->b_blocknr; 2382 oldest_start = d_bh->b_blocknr;
2374 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2383 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2375 "journal-1180: Resetting " 2384 "journal-1180: Resetting "
2376 "oldest_start to offset %lu, trans_id %lu", 2385 "oldest_start to offset %lu, trans_id %lu",
2377 oldest_start - 2386 oldest_start -
2378 SB_ONDISK_JOURNAL_1st_BLOCK 2387 SB_ONDISK_JOURNAL_1st_BLOCK
2379 (p_s_sb), oldest_trans_id); 2388 (p_s_sb), oldest_trans_id);
2380 } 2389 }
2381 if (newest_mount_id < get_desc_mount_id(desc)) { 2390 if (newest_mount_id < get_desc_mount_id(desc)) {
2382 newest_mount_id = get_desc_mount_id(desc); 2391 newest_mount_id = get_desc_mount_id(desc);
2383 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2392 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2384 "journal-1299: Setting " 2393 "journal-1299: Setting "
2385 "newest_mount_id to %d", 2394 "newest_mount_id to %d",
2386 get_desc_mount_id(desc)); 2395 get_desc_mount_id(desc));
2387 } 2396 }
2388 cur_dblock += get_desc_trans_len(desc) + 2; 2397 cur_dblock += get_desc_trans_len(desc) + 2;
2389 } else { 2398 } else {
2390 cur_dblock++; 2399 cur_dblock++;
2391 } 2400 }
2392 brelse(d_bh); 2401 brelse(d_bh);
2393 } 2402 }
2394 2403
2395 start_log_replay: 2404 start_log_replay:
2396 cur_dblock = oldest_start; 2405 cur_dblock = oldest_start;
2397 if (oldest_trans_id) { 2406 if (oldest_trans_id) {
2398 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2407 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2399 "journal-1206: Starting replay " 2408 "journal-1206: Starting replay "
2400 "from offset %llu, trans_id %lu", 2409 "from offset %llu, trans_id %lu",
2401 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2410 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2402 oldest_trans_id); 2411 oldest_trans_id);
2403 2412
2404 } 2413 }
2405 replay_count = 0; 2414 replay_count = 0;
2406 while (continue_replay && oldest_trans_id > 0) { 2415 while (continue_replay && oldest_trans_id > 0) {
2407 ret = 2416 ret =
2408 journal_read_transaction(p_s_sb, cur_dblock, oldest_start, 2417 journal_read_transaction(p_s_sb, cur_dblock, oldest_start,
2409 oldest_trans_id, newest_mount_id); 2418 oldest_trans_id, newest_mount_id);
2410 if (ret < 0) { 2419 if (ret < 0) {
2411 return ret; 2420 return ret;
2412 } else if (ret != 0) { 2421 } else if (ret != 0) {
2413 break; 2422 break;
2414 } 2423 }
2415 cur_dblock = 2424 cur_dblock =
2416 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start; 2425 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start;
2417 replay_count++; 2426 replay_count++;
2418 if (cur_dblock == oldest_start) 2427 if (cur_dblock == oldest_start)
2419 break; 2428 break;
2420 } 2429 }
2421 2430
2422 if (oldest_trans_id == 0) { 2431 if (oldest_trans_id == 0) {
2423 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2432 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2424 "journal-1225: No valid " "transactions found"); 2433 "journal-1225: No valid " "transactions found");
2425 } 2434 }
2426 /* j_start does not get set correctly if we don't replay any transactions. 2435 /* j_start does not get set correctly if we don't replay any transactions.
2427 ** if we had a valid journal_header, set j_start to the first unflushed transaction value, 2436 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
2428 ** copy the trans_id from the header 2437 ** copy the trans_id from the header
2429 */ 2438 */
2430 if (valid_journal_header && replay_count == 0) { 2439 if (valid_journal_header && replay_count == 0) {
2431 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); 2440 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2432 journal->j_trans_id = 2441 journal->j_trans_id =
2433 le32_to_cpu(jh->j_last_flush_trans_id) + 1; 2442 le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2434 journal->j_last_flush_trans_id = 2443 journal->j_last_flush_trans_id =
2435 le32_to_cpu(jh->j_last_flush_trans_id); 2444 le32_to_cpu(jh->j_last_flush_trans_id);
2436 journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; 2445 journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
2437 } else { 2446 } else {
2438 journal->j_mount_id = newest_mount_id + 1; 2447 journal->j_mount_id = newest_mount_id + 1;
2439 } 2448 }
2440 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " 2449 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
2441 "newest_mount_id to %lu", journal->j_mount_id); 2450 "newest_mount_id to %lu", journal->j_mount_id);
2442 journal->j_first_unflushed_offset = journal->j_start; 2451 journal->j_first_unflushed_offset = journal->j_start;
2443 if (replay_count > 0) { 2452 if (replay_count > 0) {
2444 reiserfs_info(p_s_sb, 2453 reiserfs_info(p_s_sb,
2445 "replayed %d transactions in %lu seconds\n", 2454 "replayed %d transactions in %lu seconds\n",
2446 replay_count, get_seconds() - start); 2455 replay_count, get_seconds() - start);
2447 } 2456 }
2448 if (!bdev_read_only(p_s_sb->s_bdev) && 2457 if (!bdev_read_only(p_s_sb->s_bdev) &&
2449 _update_journal_header_block(p_s_sb, journal->j_start, 2458 _update_journal_header_block(p_s_sb, journal->j_start,
2450 journal->j_last_flush_trans_id)) { 2459 journal->j_last_flush_trans_id)) {
2451 /* replay failed, caller must call free_journal_ram and abort 2460 /* replay failed, caller must call free_journal_ram and abort
2452 ** the mount 2461 ** the mount
2453 */ 2462 */
2454 return -1; 2463 return -1;
2455 } 2464 }
2456 return 0; 2465 return 0;
2457 } 2466 }
2458 2467
2459 static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) 2468 static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2460 { 2469 {
2461 struct reiserfs_journal_list *jl; 2470 struct reiserfs_journal_list *jl;
2462 jl = kzalloc(sizeof(struct reiserfs_journal_list), 2471 jl = kzalloc(sizeof(struct reiserfs_journal_list),
2463 GFP_NOFS | __GFP_NOFAIL); 2472 GFP_NOFS | __GFP_NOFAIL);
2464 INIT_LIST_HEAD(&jl->j_list); 2473 INIT_LIST_HEAD(&jl->j_list);
2465 INIT_LIST_HEAD(&jl->j_working_list); 2474 INIT_LIST_HEAD(&jl->j_working_list);
2466 INIT_LIST_HEAD(&jl->j_tail_bh_list); 2475 INIT_LIST_HEAD(&jl->j_tail_bh_list);
2467 INIT_LIST_HEAD(&jl->j_bh_list); 2476 INIT_LIST_HEAD(&jl->j_bh_list);
2468 sema_init(&jl->j_commit_lock, 1); 2477 sema_init(&jl->j_commit_lock, 1);
2469 SB_JOURNAL(s)->j_num_lists++; 2478 SB_JOURNAL(s)->j_num_lists++;
2470 get_journal_list(jl); 2479 get_journal_list(jl);
2471 return jl; 2480 return jl;
2472 } 2481 }
2473 2482
2474 static void journal_list_init(struct super_block *p_s_sb) 2483 static void journal_list_init(struct super_block *p_s_sb)
2475 { 2484 {
2476 SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); 2485 SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
2477 } 2486 }
2478 2487
2479 static int release_journal_dev(struct super_block *super, 2488 static int release_journal_dev(struct super_block *super,
2480 struct reiserfs_journal *journal) 2489 struct reiserfs_journal *journal)
2481 { 2490 {
2482 int result; 2491 int result;
2483 2492
2484 result = 0; 2493 result = 0;
2485 2494
2486 if (journal->j_dev_file != NULL) { 2495 if (journal->j_dev_file != NULL) {
2487 result = filp_close(journal->j_dev_file, NULL); 2496 result = filp_close(journal->j_dev_file, NULL);
2488 journal->j_dev_file = NULL; 2497 journal->j_dev_file = NULL;
2489 journal->j_dev_bd = NULL; 2498 journal->j_dev_bd = NULL;
2490 } else if (journal->j_dev_bd != NULL) { 2499 } else if (journal->j_dev_bd != NULL) {
2491 result = blkdev_put(journal->j_dev_bd); 2500 result = blkdev_put(journal->j_dev_bd);
2492 journal->j_dev_bd = NULL; 2501 journal->j_dev_bd = NULL;
2493 } 2502 }
2494 2503
2495 if (result != 0) { 2504 if (result != 0) {
2496 reiserfs_warning(super, 2505 reiserfs_warning(super,
2497 "sh-457: release_journal_dev: Cannot release journal device: %i", 2506 "sh-457: release_journal_dev: Cannot release journal device: %i",
2498 result); 2507 result);
2499 } 2508 }
2500 return result; 2509 return result;
2501 } 2510 }
2502 2511
2503 static int journal_init_dev(struct super_block *super, 2512 static int journal_init_dev(struct super_block *super,
2504 struct reiserfs_journal *journal, 2513 struct reiserfs_journal *journal,
2505 const char *jdev_name) 2514 const char *jdev_name)
2506 { 2515 {
2507 int result; 2516 int result;
2508 dev_t jdev; 2517 dev_t jdev;
2509 int blkdev_mode = FMODE_READ | FMODE_WRITE; 2518 int blkdev_mode = FMODE_READ | FMODE_WRITE;
2510 char b[BDEVNAME_SIZE]; 2519 char b[BDEVNAME_SIZE];
2511 2520
2512 result = 0; 2521 result = 0;
2513 2522
2514 journal->j_dev_bd = NULL; 2523 journal->j_dev_bd = NULL;
2515 journal->j_dev_file = NULL; 2524 journal->j_dev_file = NULL;
2516 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? 2525 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
2517 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 2526 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
2518 2527
2519 if (bdev_read_only(super->s_bdev)) 2528 if (bdev_read_only(super->s_bdev))
2520 blkdev_mode = FMODE_READ; 2529 blkdev_mode = FMODE_READ;
2521 2530
2522 /* there is no "jdev" option and journal is on separate device */ 2531 /* there is no "jdev" option and journal is on separate device */
2523 if ((!jdev_name || !jdev_name[0])) { 2532 if ((!jdev_name || !jdev_name[0])) {
2524 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2533 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
2525 if (IS_ERR(journal->j_dev_bd)) { 2534 if (IS_ERR(journal->j_dev_bd)) {
2526 result = PTR_ERR(journal->j_dev_bd); 2535 result = PTR_ERR(journal->j_dev_bd);
2527 journal->j_dev_bd = NULL; 2536 journal->j_dev_bd = NULL;
2528 reiserfs_warning(super, "sh-458: journal_init_dev: " 2537 reiserfs_warning(super, "sh-458: journal_init_dev: "
2529 "cannot init journal device '%s': %i", 2538 "cannot init journal device '%s': %i",
2530 __bdevname(jdev, b), result); 2539 __bdevname(jdev, b), result);
2531 return result; 2540 return result;
2532 } else if (jdev != super->s_dev) 2541 } else if (jdev != super->s_dev)
2533 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2542 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2534 return 0; 2543 return 0;
2535 } 2544 }
2536 2545
2537 journal->j_dev_file = filp_open(jdev_name, 0, 0); 2546 journal->j_dev_file = filp_open(jdev_name, 0, 0);
2538 if (!IS_ERR(journal->j_dev_file)) { 2547 if (!IS_ERR(journal->j_dev_file)) {
2539 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; 2548 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
2540 if (!S_ISBLK(jdev_inode->i_mode)) { 2549 if (!S_ISBLK(jdev_inode->i_mode)) {
2541 reiserfs_warning(super, "journal_init_dev: '%s' is " 2550 reiserfs_warning(super, "journal_init_dev: '%s' is "
2542 "not a block device", jdev_name); 2551 "not a block device", jdev_name);
2543 result = -ENOTBLK; 2552 result = -ENOTBLK;
2544 release_journal_dev(super, journal); 2553 release_journal_dev(super, journal);
2545 } else { 2554 } else {
2546 /* ok */ 2555 /* ok */
2547 journal->j_dev_bd = I_BDEV(jdev_inode); 2556 journal->j_dev_bd = I_BDEV(jdev_inode);
2548 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2557 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2549 reiserfs_info(super, 2558 reiserfs_info(super,
2550 "journal_init_dev: journal device: %s\n", 2559 "journal_init_dev: journal device: %s\n",
2551 bdevname(journal->j_dev_bd, b)); 2560 bdevname(journal->j_dev_bd, b));
2552 } 2561 }
2553 } else { 2562 } else {
2554 result = PTR_ERR(journal->j_dev_file); 2563 result = PTR_ERR(journal->j_dev_file);
2555 journal->j_dev_file = NULL; 2564 journal->j_dev_file = NULL;
2556 reiserfs_warning(super, 2565 reiserfs_warning(super,
2557 "journal_init_dev: Cannot open '%s': %i", 2566 "journal_init_dev: Cannot open '%s': %i",
2558 jdev_name, result); 2567 jdev_name, result);
2559 } 2568 }
2560 return result; 2569 return result;
2561 } 2570 }
2562 2571
2563 /* 2572 /*
2564 ** must be called once on fs mount. calls journal_read for you 2573 ** must be called once on fs mount. calls journal_read for you
2565 */ 2574 */
2566 int journal_init(struct super_block *p_s_sb, const char *j_dev_name, 2575 int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2567 int old_format, unsigned int commit_max_age) 2576 int old_format, unsigned int commit_max_age)
2568 { 2577 {
2569 int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2; 2578 int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2;
2570 struct buffer_head *bhjh; 2579 struct buffer_head *bhjh;
2571 struct reiserfs_super_block *rs; 2580 struct reiserfs_super_block *rs;
2572 struct reiserfs_journal_header *jh; 2581 struct reiserfs_journal_header *jh;
2573 struct reiserfs_journal *journal; 2582 struct reiserfs_journal *journal;
2574 struct reiserfs_journal_list *jl; 2583 struct reiserfs_journal_list *jl;
2575 char b[BDEVNAME_SIZE]; 2584 char b[BDEVNAME_SIZE];
2576 2585
2577 journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal)); 2586 journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal));
2578 if (!journal) { 2587 if (!journal) {
2579 reiserfs_warning(p_s_sb, 2588 reiserfs_warning(p_s_sb,
2580 "journal-1256: unable to get memory for journal structure"); 2589 "journal-1256: unable to get memory for journal structure");
2581 return 1; 2590 return 1;
2582 } 2591 }
2583 memset(journal, 0, sizeof(struct reiserfs_journal)); 2592 memset(journal, 0, sizeof(struct reiserfs_journal));
2584 INIT_LIST_HEAD(&journal->j_bitmap_nodes); 2593 INIT_LIST_HEAD(&journal->j_bitmap_nodes);
2585 INIT_LIST_HEAD(&journal->j_prealloc_list); 2594 INIT_LIST_HEAD(&journal->j_prealloc_list);
2586 INIT_LIST_HEAD(&journal->j_working_list); 2595 INIT_LIST_HEAD(&journal->j_working_list);
2587 INIT_LIST_HEAD(&journal->j_journal_list); 2596 INIT_LIST_HEAD(&journal->j_journal_list);
2588 journal->j_persistent_trans = 0; 2597 journal->j_persistent_trans = 0;
2589 if (reiserfs_allocate_list_bitmaps(p_s_sb, 2598 if (reiserfs_allocate_list_bitmaps(p_s_sb,
2590 journal->j_list_bitmap, 2599 journal->j_list_bitmap,
2591 SB_BMAP_NR(p_s_sb))) 2600 SB_BMAP_NR(p_s_sb)))
2592 goto free_and_return; 2601 goto free_and_return;
2593 allocate_bitmap_nodes(p_s_sb); 2602 allocate_bitmap_nodes(p_s_sb);
2594 2603
2595 /* reserved for journal area support */ 2604 /* reserved for journal area support */
2596 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? 2605 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
2597 REISERFS_OLD_DISK_OFFSET_IN_BYTES 2606 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2598 / p_s_sb->s_blocksize + 2607 / p_s_sb->s_blocksize +
2599 SB_BMAP_NR(p_s_sb) + 2608 SB_BMAP_NR(p_s_sb) +
2600 1 : 2609 1 :
2601 REISERFS_DISK_OFFSET_IN_BYTES / 2610 REISERFS_DISK_OFFSET_IN_BYTES /
2602 p_s_sb->s_blocksize + 2); 2611 p_s_sb->s_blocksize + 2);
2603 2612
2604 /* Sanity check to see is the standard journal fitting withing first bitmap 2613 /* Sanity check to see is the standard journal fitting withing first bitmap
2605 (actual for small blocksizes) */ 2614 (actual for small blocksizes) */
2606 if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) && 2615 if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) &&
2607 (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + 2616 (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) +
2608 SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) { 2617 SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) {
2609 reiserfs_warning(p_s_sb, 2618 reiserfs_warning(p_s_sb,
2610 "journal-1393: journal does not fit for area " 2619 "journal-1393: journal does not fit for area "
2611 "addressed by first of bitmap blocks. It starts at " 2620 "addressed by first of bitmap blocks. It starts at "
2612 "%u and its size is %u. Block size %ld", 2621 "%u and its size is %u. Block size %ld",
2613 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), 2622 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb),
2614 SB_ONDISK_JOURNAL_SIZE(p_s_sb), 2623 SB_ONDISK_JOURNAL_SIZE(p_s_sb),
2615 p_s_sb->s_blocksize); 2624 p_s_sb->s_blocksize);
2616 goto free_and_return; 2625 goto free_and_return;
2617 } 2626 }
2618 2627
2619 if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) { 2628 if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) {
2620 reiserfs_warning(p_s_sb, 2629 reiserfs_warning(p_s_sb,
2621 "sh-462: unable to initialize jornal device"); 2630 "sh-462: unable to initialize jornal device");
2622 goto free_and_return; 2631 goto free_and_return;
2623 } 2632 }
2624 2633
2625 rs = SB_DISK_SUPER_BLOCK(p_s_sb); 2634 rs = SB_DISK_SUPER_BLOCK(p_s_sb);
2626 2635
2627 /* read journal header */ 2636 /* read journal header */
2628 bhjh = journal_bread(p_s_sb, 2637 bhjh = journal_bread(p_s_sb,
2629 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2638 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2630 SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2639 SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2631 if (!bhjh) { 2640 if (!bhjh) {
2632 reiserfs_warning(p_s_sb, 2641 reiserfs_warning(p_s_sb,
2633 "sh-459: unable to read journal header"); 2642 "sh-459: unable to read journal header");
2634 goto free_and_return; 2643 goto free_and_return;
2635 } 2644 }
2636 jh = (struct reiserfs_journal_header *)(bhjh->b_data); 2645 jh = (struct reiserfs_journal_header *)(bhjh->b_data);
2637 2646
2638 /* make sure that journal matches to the super block */ 2647 /* make sure that journal matches to the super block */
2639 if (is_reiserfs_jr(rs) 2648 if (is_reiserfs_jr(rs)
2640 && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != 2649 && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2641 sb_jp_journal_magic(rs))) { 2650 sb_jp_journal_magic(rs))) {
2642 reiserfs_warning(p_s_sb, 2651 reiserfs_warning(p_s_sb,
2643 "sh-460: journal header magic %x " 2652 "sh-460: journal header magic %x "
2644 "(device %s) does not match to magic found in super " 2653 "(device %s) does not match to magic found in super "
2645 "block %x", jh->jh_journal.jp_journal_magic, 2654 "block %x", jh->jh_journal.jp_journal_magic,
2646 bdevname(journal->j_dev_bd, b), 2655 bdevname(journal->j_dev_bd, b),
2647 sb_jp_journal_magic(rs)); 2656 sb_jp_journal_magic(rs));
2648 brelse(bhjh); 2657 brelse(bhjh);
2649 goto free_and_return; 2658 goto free_and_return;
2650 } 2659 }
2651 2660
2652 journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); 2661 journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
2653 journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); 2662 journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2654 journal->j_max_commit_age = 2663 journal->j_max_commit_age =
2655 le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); 2664 le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
2656 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 2665 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
2657 2666
2658 if (journal->j_trans_max) { 2667 if (journal->j_trans_max) {
2659 /* make sure these parameters are available, assign it if they are not */ 2668 /* make sure these parameters are available, assign it if they are not */
2660 __u32 initial = journal->j_trans_max; 2669 __u32 initial = journal->j_trans_max;
2661 __u32 ratio = 1; 2670 __u32 ratio = 1;
2662 2671
2663 if (p_s_sb->s_blocksize < 4096) 2672 if (p_s_sb->s_blocksize < 4096)
2664 ratio = 4096 / p_s_sb->s_blocksize; 2673 ratio = 4096 / p_s_sb->s_blocksize;
2665 2674
2666 if (SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max < 2675 if (SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max <
2667 JOURNAL_MIN_RATIO) 2676 JOURNAL_MIN_RATIO)
2668 journal->j_trans_max = 2677 journal->j_trans_max =
2669 SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO; 2678 SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO;
2670 if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio) 2679 if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio)
2671 journal->j_trans_max = 2680 journal->j_trans_max =
2672 JOURNAL_TRANS_MAX_DEFAULT / ratio; 2681 JOURNAL_TRANS_MAX_DEFAULT / ratio;
2673 if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio) 2682 if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio)
2674 journal->j_trans_max = 2683 journal->j_trans_max =
2675 JOURNAL_TRANS_MIN_DEFAULT / ratio; 2684 JOURNAL_TRANS_MIN_DEFAULT / ratio;
2676 2685
2677 if (journal->j_trans_max != initial) 2686 if (journal->j_trans_max != initial)
2678 reiserfs_warning(p_s_sb, 2687 reiserfs_warning(p_s_sb,
2679 "sh-461: journal_init: wrong transaction max size (%u). Changed to %u", 2688 "sh-461: journal_init: wrong transaction max size (%u). Changed to %u",
2680 initial, journal->j_trans_max); 2689 initial, journal->j_trans_max);
2681 2690
2682 journal->j_max_batch = journal->j_trans_max * 2691 journal->j_max_batch = journal->j_trans_max *
2683 JOURNAL_MAX_BATCH_DEFAULT / JOURNAL_TRANS_MAX_DEFAULT; 2692 JOURNAL_MAX_BATCH_DEFAULT / JOURNAL_TRANS_MAX_DEFAULT;
2684 } 2693 }
2685 2694
2686 if (!journal->j_trans_max) { 2695 if (!journal->j_trans_max) {
2687 /*we have the file system was created by old version of mkreiserfs 2696 /*we have the file system was created by old version of mkreiserfs
2688 so this field contains zero value */ 2697 so this field contains zero value */
2689 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; 2698 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2690 journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; 2699 journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2691 journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; 2700 journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2692 2701
2693 /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096 2702 /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096
2694 trans max size is decreased proportionally */ 2703 trans max size is decreased proportionally */
2695 if (p_s_sb->s_blocksize < 4096) { 2704 if (p_s_sb->s_blocksize < 4096) {
2696 journal->j_trans_max /= (4096 / p_s_sb->s_blocksize); 2705 journal->j_trans_max /= (4096 / p_s_sb->s_blocksize);
2697 journal->j_max_batch = (journal->j_trans_max) * 9 / 10; 2706 journal->j_max_batch = (journal->j_trans_max) * 9 / 10;
2698 } 2707 }
2699 } 2708 }
2700 2709
2701 journal->j_default_max_commit_age = journal->j_max_commit_age; 2710 journal->j_default_max_commit_age = journal->j_max_commit_age;
2702 2711
2703 if (commit_max_age != 0) { 2712 if (commit_max_age != 0) {
2704 journal->j_max_commit_age = commit_max_age; 2713 journal->j_max_commit_age = commit_max_age;
2705 journal->j_max_trans_age = commit_max_age; 2714 journal->j_max_trans_age = commit_max_age;
2706 } 2715 }
2707 2716
2708 reiserfs_info(p_s_sb, "journal params: device %s, size %u, " 2717 reiserfs_info(p_s_sb, "journal params: device %s, size %u, "
2709 "journal first block %u, max trans len %u, max batch %u, " 2718 "journal first block %u, max trans len %u, max batch %u, "
2710 "max commit age %u, max trans age %u\n", 2719 "max commit age %u, max trans age %u\n",
2711 bdevname(journal->j_dev_bd, b), 2720 bdevname(journal->j_dev_bd, b),
2712 SB_ONDISK_JOURNAL_SIZE(p_s_sb), 2721 SB_ONDISK_JOURNAL_SIZE(p_s_sb),
2713 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2722 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2714 journal->j_trans_max, 2723 journal->j_trans_max,
2715 journal->j_max_batch, 2724 journal->j_max_batch,
2716 journal->j_max_commit_age, journal->j_max_trans_age); 2725 journal->j_max_commit_age, journal->j_max_trans_age);
2717 2726
2718 brelse(bhjh); 2727 brelse(bhjh);
2719 2728
2720 journal->j_list_bitmap_index = 0; 2729 journal->j_list_bitmap_index = 0;
2721 journal_list_init(p_s_sb); 2730 journal_list_init(p_s_sb);
2722 2731
2723 memset(journal->j_list_hash_table, 0, 2732 memset(journal->j_list_hash_table, 0,
2724 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 2733 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
2725 2734
2726 INIT_LIST_HEAD(&journal->j_dirty_buffers); 2735 INIT_LIST_HEAD(&journal->j_dirty_buffers);
2727 spin_lock_init(&journal->j_dirty_buffers_lock); 2736 spin_lock_init(&journal->j_dirty_buffers_lock);
2728 2737
2729 journal->j_start = 0; 2738 journal->j_start = 0;
2730 journal->j_len = 0; 2739 journal->j_len = 0;
2731 journal->j_len_alloc = 0; 2740 journal->j_len_alloc = 0;
2732 atomic_set(&(journal->j_wcount), 0); 2741 atomic_set(&(journal->j_wcount), 0);
2733 atomic_set(&(journal->j_async_throttle), 0); 2742 atomic_set(&(journal->j_async_throttle), 0);
2734 journal->j_bcount = 0; 2743 journal->j_bcount = 0;
2735 journal->j_trans_start_time = 0; 2744 journal->j_trans_start_time = 0;
2736 journal->j_last = NULL; 2745 journal->j_last = NULL;
2737 journal->j_first = NULL; 2746 journal->j_first = NULL;
2738 init_waitqueue_head(&(journal->j_join_wait)); 2747 init_waitqueue_head(&(journal->j_join_wait));
2739 sema_init(&journal->j_lock, 1); 2748 sema_init(&journal->j_lock, 1);
2740 sema_init(&journal->j_flush_sem, 1); 2749 sema_init(&journal->j_flush_sem, 1);
2741 2750
2742 journal->j_trans_id = 10; 2751 journal->j_trans_id = 10;
2743 journal->j_mount_id = 10; 2752 journal->j_mount_id = 10;
2744 journal->j_state = 0; 2753 journal->j_state = 0;
2745 atomic_set(&(journal->j_jlock), 0); 2754 atomic_set(&(journal->j_jlock), 0);
2746 journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 2755 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2747 journal->j_cnode_free_orig = journal->j_cnode_free_list; 2756 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2748 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 2757 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
2749 journal->j_cnode_used = 0; 2758 journal->j_cnode_used = 0;
2750 journal->j_must_wait = 0; 2759 journal->j_must_wait = 0;
2751 2760
2752 if (journal->j_cnode_free == 0) { 2761 if (journal->j_cnode_free == 0) {
2753 reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory " 2762 reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory "
2754 "allocation failed (%ld bytes). Journal is " 2763 "allocation failed (%ld bytes). Journal is "
2755 "too large for available memory. Usually " 2764 "too large for available memory. Usually "
2756 "this is due to a journal that is too large.", 2765 "this is due to a journal that is too large.",
2757 sizeof (struct reiserfs_journal_cnode) * num_cnodes); 2766 sizeof (struct reiserfs_journal_cnode) * num_cnodes);
2758 goto free_and_return; 2767 goto free_and_return;
2759 } 2768 }
2760 2769
2761 init_journal_hash(p_s_sb); 2770 init_journal_hash(p_s_sb);
2762 jl = journal->j_current_jl; 2771 jl = journal->j_current_jl;
2763 jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); 2772 jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl);
2764 if (!jl->j_list_bitmap) { 2773 if (!jl->j_list_bitmap) {
2765 reiserfs_warning(p_s_sb, 2774 reiserfs_warning(p_s_sb,
2766 "journal-2005, get_list_bitmap failed for journal list 0"); 2775 "journal-2005, get_list_bitmap failed for journal list 0");
2767 goto free_and_return; 2776 goto free_and_return;
2768 } 2777 }
2769 if (journal_read(p_s_sb) < 0) { 2778 if (journal_read(p_s_sb) < 0) {
2770 reiserfs_warning(p_s_sb, "Replay Failure, unable to mount"); 2779 reiserfs_warning(p_s_sb, "Replay Failure, unable to mount");
2771 goto free_and_return; 2780 goto free_and_return;
2772 } 2781 }
2773 2782
2774 reiserfs_mounted_fs_count++; 2783 reiserfs_mounted_fs_count++;
2775 if (reiserfs_mounted_fs_count <= 1) 2784 if (reiserfs_mounted_fs_count <= 1)
2776 commit_wq = create_workqueue("reiserfs"); 2785 commit_wq = create_workqueue("reiserfs");
2777 2786
2778 INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb); 2787 INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb);
2779 return 0; 2788 return 0;
2780 free_and_return: 2789 free_and_return:
2781 free_journal_ram(p_s_sb); 2790 free_journal_ram(p_s_sb);
2782 return 1; 2791 return 1;
2783 } 2792 }
2784 2793
2785 /* 2794 /*
2786 ** test for a polite end of the current transaction. Used by file_write, and should 2795 ** test for a polite end of the current transaction. Used by file_write, and should
2787 ** be used by delete to make sure they don't write more than can fit inside a single 2796 ** be used by delete to make sure they don't write more than can fit inside a single
2788 ** transaction 2797 ** transaction
2789 */ 2798 */
2790 int journal_transaction_should_end(struct reiserfs_transaction_handle *th, 2799 int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2791 int new_alloc) 2800 int new_alloc)
2792 { 2801 {
2793 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 2802 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
2794 time_t now = get_seconds(); 2803 time_t now = get_seconds();
2795 /* cannot restart while nested */ 2804 /* cannot restart while nested */
2796 BUG_ON(!th->t_trans_id); 2805 BUG_ON(!th->t_trans_id);
2797 if (th->t_refcount > 1) 2806 if (th->t_refcount > 1)
2798 return 0; 2807 return 0;
2799 if (journal->j_must_wait > 0 || 2808 if (journal->j_must_wait > 0 ||
2800 (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || 2809 (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
2801 atomic_read(&(journal->j_jlock)) || 2810 atomic_read(&(journal->j_jlock)) ||
2802 (now - journal->j_trans_start_time) > journal->j_max_trans_age || 2811 (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
2803 journal->j_cnode_free < (journal->j_trans_max * 3)) { 2812 journal->j_cnode_free < (journal->j_trans_max * 3)) {
2804 return 1; 2813 return 1;
2805 } 2814 }
2806 return 0; 2815 return 0;
2807 } 2816 }
2808 2817
2809 /* this must be called inside a transaction, and requires the 2818 /* this must be called inside a transaction, and requires the
2810 ** kernel_lock to be held 2819 ** kernel_lock to be held
2811 */ 2820 */
2812 void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2821 void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2813 { 2822 {
2814 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 2823 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
2815 BUG_ON(!th->t_trans_id); 2824 BUG_ON(!th->t_trans_id);
2816 journal->j_must_wait = 1; 2825 journal->j_must_wait = 1;
2817 set_bit(J_WRITERS_BLOCKED, &journal->j_state); 2826 set_bit(J_WRITERS_BLOCKED, &journal->j_state);
2818 return; 2827 return;
2819 } 2828 }
2820 2829
2821 /* this must be called without a transaction started, and does not 2830 /* this must be called without a transaction started, and does not
2822 ** require BKL 2831 ** require BKL
2823 */ 2832 */
2824 void reiserfs_allow_writes(struct super_block *s) 2833 void reiserfs_allow_writes(struct super_block *s)
2825 { 2834 {
2826 struct reiserfs_journal *journal = SB_JOURNAL(s); 2835 struct reiserfs_journal *journal = SB_JOURNAL(s);
2827 clear_bit(J_WRITERS_BLOCKED, &journal->j_state); 2836 clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
2828 wake_up(&journal->j_join_wait); 2837 wake_up(&journal->j_join_wait);
2829 } 2838 }
2830 2839
2831 /* this must be called without a transaction started, and does not 2840 /* this must be called without a transaction started, and does not
2832 ** require BKL 2841 ** require BKL
2833 */ 2842 */
2834 void reiserfs_wait_on_write_block(struct super_block *s) 2843 void reiserfs_wait_on_write_block(struct super_block *s)
2835 { 2844 {
2836 struct reiserfs_journal *journal = SB_JOURNAL(s); 2845 struct reiserfs_journal *journal = SB_JOURNAL(s);
2837 wait_event(journal->j_join_wait, 2846 wait_event(journal->j_join_wait,
2838 !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); 2847 !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
2839 } 2848 }
2840 2849
2841 static void queue_log_writer(struct super_block *s) 2850 static void queue_log_writer(struct super_block *s)
2842 { 2851 {
2843 wait_queue_t wait; 2852 wait_queue_t wait;
2844 struct reiserfs_journal *journal = SB_JOURNAL(s); 2853 struct reiserfs_journal *journal = SB_JOURNAL(s);
2845 set_bit(J_WRITERS_QUEUED, &journal->j_state); 2854 set_bit(J_WRITERS_QUEUED, &journal->j_state);
2846 2855
2847 /* 2856 /*
2848 * we don't want to use wait_event here because 2857 * we don't want to use wait_event here because
2849 * we only want to wait once. 2858 * we only want to wait once.
2850 */ 2859 */
2851 init_waitqueue_entry(&wait, current); 2860 init_waitqueue_entry(&wait, current);
2852 add_wait_queue(&journal->j_join_wait, &wait); 2861 add_wait_queue(&journal->j_join_wait, &wait);
2853 set_current_state(TASK_UNINTERRUPTIBLE); 2862 set_current_state(TASK_UNINTERRUPTIBLE);
2854 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 2863 if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
2855 schedule(); 2864 schedule();
2856 current->state = TASK_RUNNING; 2865 current->state = TASK_RUNNING;
2857 remove_wait_queue(&journal->j_join_wait, &wait); 2866 remove_wait_queue(&journal->j_join_wait, &wait);
2858 } 2867 }
2859 2868
2860 static void wake_queued_writers(struct super_block *s) 2869 static void wake_queued_writers(struct super_block *s)
2861 { 2870 {
2862 struct reiserfs_journal *journal = SB_JOURNAL(s); 2871 struct reiserfs_journal *journal = SB_JOURNAL(s);
2863 if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) 2872 if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
2864 wake_up(&journal->j_join_wait); 2873 wake_up(&journal->j_join_wait);
2865 } 2874 }
2866 2875
2867 static void let_transaction_grow(struct super_block *sb, unsigned long trans_id) 2876 static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
2868 { 2877 {
2869 struct reiserfs_journal *journal = SB_JOURNAL(sb); 2878 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2870 unsigned long bcount = journal->j_bcount; 2879 unsigned long bcount = journal->j_bcount;
2871 while (1) { 2880 while (1) {
2872 schedule_timeout_uninterruptible(1); 2881 schedule_timeout_uninterruptible(1);
2873 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 2882 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
2874 while ((atomic_read(&journal->j_wcount) > 0 || 2883 while ((atomic_read(&journal->j_wcount) > 0 ||
2875 atomic_read(&journal->j_jlock)) && 2884 atomic_read(&journal->j_jlock)) &&
2876 journal->j_trans_id == trans_id) { 2885 journal->j_trans_id == trans_id) {
2877 queue_log_writer(sb); 2886 queue_log_writer(sb);
2878 } 2887 }
2879 if (journal->j_trans_id != trans_id) 2888 if (journal->j_trans_id != trans_id)
2880 break; 2889 break;
2881 if (bcount == journal->j_bcount) 2890 if (bcount == journal->j_bcount)
2882 break; 2891 break;
2883 bcount = journal->j_bcount; 2892 bcount = journal->j_bcount;
2884 } 2893 }
2885 } 2894 }
2886 2895
2887 /* join == true if you must join an existing transaction. 2896 /* join == true if you must join an existing transaction.
2888 ** join == false if you can deal with waiting for others to finish 2897 ** join == false if you can deal with waiting for others to finish
2889 ** 2898 **
2890 ** this will block until the transaction is joinable. send the number of blocks you 2899 ** this will block until the transaction is joinable. send the number of blocks you
2891 ** expect to use in nblocks. 2900 ** expect to use in nblocks.
2892 */ 2901 */
2893 static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 2902 static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2894 struct super_block *p_s_sb, unsigned long nblocks, 2903 struct super_block *p_s_sb, unsigned long nblocks,
2895 int join) 2904 int join)
2896 { 2905 {
2897 time_t now = get_seconds(); 2906 time_t now = get_seconds();
2898 int old_trans_id; 2907 int old_trans_id;
2899 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2908 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2900 struct reiserfs_transaction_handle myth; 2909 struct reiserfs_transaction_handle myth;
2901 int sched_count = 0; 2910 int sched_count = 0;
2902 int retval; 2911 int retval;
2903 2912
2904 reiserfs_check_lock_depth(p_s_sb, "journal_begin"); 2913 reiserfs_check_lock_depth(p_s_sb, "journal_begin");
2905 if (nblocks > journal->j_trans_max) 2914 if (nblocks > journal->j_trans_max)
2906 BUG(); 2915 BUG();
2907 2916
2908 PROC_INFO_INC(p_s_sb, journal.journal_being); 2917 PROC_INFO_INC(p_s_sb, journal.journal_being);
2909 /* set here for journal_join */ 2918 /* set here for journal_join */
2910 th->t_refcount = 1; 2919 th->t_refcount = 1;
2911 th->t_super = p_s_sb; 2920 th->t_super = p_s_sb;
2912 2921
2913 relock: 2922 relock:
2914 lock_journal(p_s_sb); 2923 lock_journal(p_s_sb);
2915 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { 2924 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
2916 unlock_journal(p_s_sb); 2925 unlock_journal(p_s_sb);
2917 retval = journal->j_errno; 2926 retval = journal->j_errno;
2918 goto out_fail; 2927 goto out_fail;
2919 } 2928 }
2920 journal->j_bcount++; 2929 journal->j_bcount++;
2921 2930
2922 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 2931 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
2923 unlock_journal(p_s_sb); 2932 unlock_journal(p_s_sb);
2924 reiserfs_wait_on_write_block(p_s_sb); 2933 reiserfs_wait_on_write_block(p_s_sb);
2925 PROC_INFO_INC(p_s_sb, journal.journal_relock_writers); 2934 PROC_INFO_INC(p_s_sb, journal.journal_relock_writers);
2926 goto relock; 2935 goto relock;
2927 } 2936 }
2928 now = get_seconds(); 2937 now = get_seconds();
2929 2938
2930 /* if there is no room in the journal OR 2939 /* if there is no room in the journal OR
2931 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 2940 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
2932 ** we don't sleep if there aren't other writers 2941 ** we don't sleep if there aren't other writers
2933 */ 2942 */
2934 2943
2935 if ((!join && journal->j_must_wait > 0) || 2944 if ((!join && journal->j_must_wait > 0) ||
2936 (!join 2945 (!join
2937 && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) 2946 && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
2938 || (!join && atomic_read(&journal->j_wcount) > 0 2947 || (!join && atomic_read(&journal->j_wcount) > 0
2939 && journal->j_trans_start_time > 0 2948 && journal->j_trans_start_time > 0
2940 && (now - journal->j_trans_start_time) > 2949 && (now - journal->j_trans_start_time) >
2941 journal->j_max_trans_age) || (!join 2950 journal->j_max_trans_age) || (!join
2942 && atomic_read(&journal->j_jlock)) 2951 && atomic_read(&journal->j_jlock))
2943 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 2952 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
2944 2953
2945 old_trans_id = journal->j_trans_id; 2954 old_trans_id = journal->j_trans_id;
2946 unlock_journal(p_s_sb); /* allow others to finish this transaction */ 2955 unlock_journal(p_s_sb); /* allow others to finish this transaction */
2947 2956
2948 if (!join && (journal->j_len_alloc + nblocks + 2) >= 2957 if (!join && (journal->j_len_alloc + nblocks + 2) >=
2949 journal->j_max_batch && 2958 journal->j_max_batch &&
2950 ((journal->j_len + nblocks + 2) * 100) < 2959 ((journal->j_len + nblocks + 2) * 100) <
2951 (journal->j_len_alloc * 75)) { 2960 (journal->j_len_alloc * 75)) {
2952 if (atomic_read(&journal->j_wcount) > 10) { 2961 if (atomic_read(&journal->j_wcount) > 10) {
2953 sched_count++; 2962 sched_count++;
2954 queue_log_writer(p_s_sb); 2963 queue_log_writer(p_s_sb);
2955 goto relock; 2964 goto relock;
2956 } 2965 }
2957 } 2966 }
2958 /* don't mess with joining the transaction if all we have to do is 2967 /* don't mess with joining the transaction if all we have to do is
2959 * wait for someone else to do a commit 2968 * wait for someone else to do a commit
2960 */ 2969 */
2961 if (atomic_read(&journal->j_jlock)) { 2970 if (atomic_read(&journal->j_jlock)) {
2962 while (journal->j_trans_id == old_trans_id && 2971 while (journal->j_trans_id == old_trans_id &&
2963 atomic_read(&journal->j_jlock)) { 2972 atomic_read(&journal->j_jlock)) {
2964 queue_log_writer(p_s_sb); 2973 queue_log_writer(p_s_sb);
2965 } 2974 }
2966 goto relock; 2975 goto relock;
2967 } 2976 }
2968 retval = journal_join(&myth, p_s_sb, 1); 2977 retval = journal_join(&myth, p_s_sb, 1);
2969 if (retval) 2978 if (retval)
2970 goto out_fail; 2979 goto out_fail;
2971 2980
2972 /* someone might have ended the transaction while we joined */ 2981 /* someone might have ended the transaction while we joined */
2973 if (old_trans_id != journal->j_trans_id) { 2982 if (old_trans_id != journal->j_trans_id) {
2974 retval = do_journal_end(&myth, p_s_sb, 1, 0); 2983 retval = do_journal_end(&myth, p_s_sb, 1, 0);
2975 } else { 2984 } else {
2976 retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW); 2985 retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW);
2977 } 2986 }
2978 2987
2979 if (retval) 2988 if (retval)
2980 goto out_fail; 2989 goto out_fail;
2981 2990
2982 PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount); 2991 PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount);
2983 goto relock; 2992 goto relock;
2984 } 2993 }
2985 /* we are the first writer, set trans_id */ 2994 /* we are the first writer, set trans_id */
2986 if (journal->j_trans_start_time == 0) { 2995 if (journal->j_trans_start_time == 0) {
2987 journal->j_trans_start_time = get_seconds(); 2996 journal->j_trans_start_time = get_seconds();
2988 } 2997 }
2989 atomic_inc(&(journal->j_wcount)); 2998 atomic_inc(&(journal->j_wcount));
2990 journal->j_len_alloc += nblocks; 2999 journal->j_len_alloc += nblocks;
2991 th->t_blocks_logged = 0; 3000 th->t_blocks_logged = 0;
2992 th->t_blocks_allocated = nblocks; 3001 th->t_blocks_allocated = nblocks;
2993 th->t_trans_id = journal->j_trans_id; 3002 th->t_trans_id = journal->j_trans_id;
2994 unlock_journal(p_s_sb); 3003 unlock_journal(p_s_sb);
2995 INIT_LIST_HEAD(&th->t_list); 3004 INIT_LIST_HEAD(&th->t_list);
2996 get_fs_excl(); 3005 get_fs_excl();
2997 return 0; 3006 return 0;
2998 3007
2999 out_fail: 3008 out_fail:
3000 memset(th, 0, sizeof(*th)); 3009 memset(th, 0, sizeof(*th));
3001 /* Re-set th->t_super, so we can properly keep track of how many 3010 /* Re-set th->t_super, so we can properly keep track of how many
3002 * persistent transactions there are. We need to do this so if this 3011 * persistent transactions there are. We need to do this so if this
3003 * call is part of a failed restart_transaction, we can free it later */ 3012 * call is part of a failed restart_transaction, we can free it later */
3004 th->t_super = p_s_sb; 3013 th->t_super = p_s_sb;
3005 return retval; 3014 return retval;
3006 } 3015 }
3007 3016
3008 struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct 3017 struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3009 super_block 3018 super_block
3010 *s, 3019 *s,
3011 int nblocks) 3020 int nblocks)
3012 { 3021 {
3013 int ret; 3022 int ret;
3014 struct reiserfs_transaction_handle *th; 3023 struct reiserfs_transaction_handle *th;
3015 3024
3016 /* if we're nesting into an existing transaction. It will be 3025 /* if we're nesting into an existing transaction. It will be
3017 ** persistent on its own 3026 ** persistent on its own
3018 */ 3027 */
3019 if (reiserfs_transaction_running(s)) { 3028 if (reiserfs_transaction_running(s)) {
3020 th = current->journal_info; 3029 th = current->journal_info;
3021 th->t_refcount++; 3030 th->t_refcount++;
3022 if (th->t_refcount < 2) { 3031 if (th->t_refcount < 2) {
3023 BUG(); 3032 BUG();
3024 } 3033 }
3025 return th; 3034 return th;
3026 } 3035 }
3027 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); 3036 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
3028 if (!th) 3037 if (!th)
3029 return NULL; 3038 return NULL;
3030 ret = journal_begin(th, s, nblocks); 3039 ret = journal_begin(th, s, nblocks);
3031 if (ret) { 3040 if (ret) {
3032 kfree(th); 3041 kfree(th);
3033 return NULL; 3042 return NULL;
3034 } 3043 }
3035 3044
3036 SB_JOURNAL(s)->j_persistent_trans++; 3045 SB_JOURNAL(s)->j_persistent_trans++;
3037 return th; 3046 return th;
3038 } 3047 }
3039 3048
3040 int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) 3049 int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3041 { 3050 {
3042 struct super_block *s = th->t_super; 3051 struct super_block *s = th->t_super;
3043 int ret = 0; 3052 int ret = 0;
3044 if (th->t_trans_id) 3053 if (th->t_trans_id)
3045 ret = journal_end(th, th->t_super, th->t_blocks_allocated); 3054 ret = journal_end(th, th->t_super, th->t_blocks_allocated);
3046 else 3055 else
3047 ret = -EIO; 3056 ret = -EIO;
3048 if (th->t_refcount == 0) { 3057 if (th->t_refcount == 0) {
3049 SB_JOURNAL(s)->j_persistent_trans--; 3058 SB_JOURNAL(s)->j_persistent_trans--;
3050 kfree(th); 3059 kfree(th);
3051 } 3060 }
3052 return ret; 3061 return ret;
3053 } 3062 }
3054 3063
3055 static int journal_join(struct reiserfs_transaction_handle *th, 3064 static int journal_join(struct reiserfs_transaction_handle *th,
3056 struct super_block *p_s_sb, unsigned long nblocks) 3065 struct super_block *p_s_sb, unsigned long nblocks)
3057 { 3066 {
3058 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3067 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3059 3068
3060 /* this keeps do_journal_end from NULLing out the current->journal_info 3069 /* this keeps do_journal_end from NULLing out the current->journal_info
3061 ** pointer 3070 ** pointer
3062 */ 3071 */
3063 th->t_handle_save = cur_th; 3072 th->t_handle_save = cur_th;
3064 if (cur_th && cur_th->t_refcount > 1) { 3073 if (cur_th && cur_th->t_refcount > 1) {
3065 BUG(); 3074 BUG();
3066 } 3075 }
3067 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); 3076 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN);
3068 } 3077 }
3069 3078
3070 int journal_join_abort(struct reiserfs_transaction_handle *th, 3079 int journal_join_abort(struct reiserfs_transaction_handle *th,
3071 struct super_block *p_s_sb, unsigned long nblocks) 3080 struct super_block *p_s_sb, unsigned long nblocks)
3072 { 3081 {
3073 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3082 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3074 3083
3075 /* this keeps do_journal_end from NULLing out the current->journal_info 3084 /* this keeps do_journal_end from NULLing out the current->journal_info
3076 ** pointer 3085 ** pointer
3077 */ 3086 */
3078 th->t_handle_save = cur_th; 3087 th->t_handle_save = cur_th;
3079 if (cur_th && cur_th->t_refcount > 1) { 3088 if (cur_th && cur_th->t_refcount > 1) {
3080 BUG(); 3089 BUG();
3081 } 3090 }
3082 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); 3091 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT);
3083 } 3092 }
3084 3093
3085 int journal_begin(struct reiserfs_transaction_handle *th, 3094 int journal_begin(struct reiserfs_transaction_handle *th,
3086 struct super_block *p_s_sb, unsigned long nblocks) 3095 struct super_block *p_s_sb, unsigned long nblocks)
3087 { 3096 {
3088 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3097 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3089 int ret; 3098 int ret;
3090 3099
3091 th->t_handle_save = NULL; 3100 th->t_handle_save = NULL;
3092 if (cur_th) { 3101 if (cur_th) {
3093 /* we are nesting into the current transaction */ 3102 /* we are nesting into the current transaction */
3094 if (cur_th->t_super == p_s_sb) { 3103 if (cur_th->t_super == p_s_sb) {
3095 BUG_ON(!cur_th->t_refcount); 3104 BUG_ON(!cur_th->t_refcount);
3096 cur_th->t_refcount++; 3105 cur_th->t_refcount++;
3097 memcpy(th, cur_th, sizeof(*th)); 3106 memcpy(th, cur_th, sizeof(*th));
3098 if (th->t_refcount <= 1) 3107 if (th->t_refcount <= 1)
3099 reiserfs_warning(p_s_sb, 3108 reiserfs_warning(p_s_sb,
3100 "BAD: refcount <= 1, but journal_info != 0"); 3109 "BAD: refcount <= 1, but journal_info != 0");
3101 return 0; 3110 return 0;
3102 } else { 3111 } else {
3103 /* we've ended up with a handle from a different filesystem. 3112 /* we've ended up with a handle from a different filesystem.
3104 ** save it and restore on journal_end. This should never 3113 ** save it and restore on journal_end. This should never
3105 ** really happen... 3114 ** really happen...
3106 */ 3115 */
3107 reiserfs_warning(p_s_sb, 3116 reiserfs_warning(p_s_sb,
3108 "clm-2100: nesting info a different FS"); 3117 "clm-2100: nesting info a different FS");
3109 th->t_handle_save = current->journal_info; 3118 th->t_handle_save = current->journal_info;
3110 current->journal_info = th; 3119 current->journal_info = th;
3111 } 3120 }
3112 } else { 3121 } else {
3113 current->journal_info = th; 3122 current->journal_info = th;
3114 } 3123 }
3115 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); 3124 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG);
3116 if (current->journal_info != th) 3125 if (current->journal_info != th)
3117 BUG(); 3126 BUG();
3118 3127
3119 /* I guess this boils down to being the reciprocal of clm-2100 above. 3128 /* I guess this boils down to being the reciprocal of clm-2100 above.
3120 * If do_journal_begin_r fails, we need to put it back, since journal_end 3129 * If do_journal_begin_r fails, we need to put it back, since journal_end
3121 * won't be called to do it. */ 3130 * won't be called to do it. */
3122 if (ret) 3131 if (ret)
3123 current->journal_info = th->t_handle_save; 3132 current->journal_info = th->t_handle_save;
3124 else 3133 else
3125 BUG_ON(!th->t_refcount); 3134 BUG_ON(!th->t_refcount);
3126 3135
3127 return ret; 3136 return ret;
3128 } 3137 }
3129 3138
3130 /* 3139 /*
3131 ** puts bh into the current transaction. If it was already there, reorders removes the 3140 ** puts bh into the current transaction. If it was already there, reorders removes the
3132 ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). 3141 ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
3133 ** 3142 **
3134 ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the 3143 ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the
3135 ** transaction is committed. 3144 ** transaction is committed.
3136 ** 3145 **
3137 ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 3146 ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
3138 */ 3147 */
3139 int journal_mark_dirty(struct reiserfs_transaction_handle *th, 3148 int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3140 struct super_block *p_s_sb, struct buffer_head *bh) 3149 struct super_block *p_s_sb, struct buffer_head *bh)
3141 { 3150 {
3142 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3151 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3143 struct reiserfs_journal_cnode *cn = NULL; 3152 struct reiserfs_journal_cnode *cn = NULL;
3144 int count_already_incd = 0; 3153 int count_already_incd = 0;
3145 int prepared = 0; 3154 int prepared = 0;
3146 BUG_ON(!th->t_trans_id); 3155 BUG_ON(!th->t_trans_id);
3147 3156
3148 PROC_INFO_INC(p_s_sb, journal.mark_dirty); 3157 PROC_INFO_INC(p_s_sb, journal.mark_dirty);
3149 if (th->t_trans_id != journal->j_trans_id) { 3158 if (th->t_trans_id != journal->j_trans_id) {
3150 reiserfs_panic(th->t_super, 3159 reiserfs_panic(th->t_super,
3151 "journal-1577: handle trans id %ld != current trans id %ld\n", 3160 "journal-1577: handle trans id %ld != current trans id %ld\n",
3152 th->t_trans_id, journal->j_trans_id); 3161 th->t_trans_id, journal->j_trans_id);
3153 } 3162 }
3154 3163
3155 p_s_sb->s_dirt = 1; 3164 p_s_sb->s_dirt = 1;
3156 3165
3157 prepared = test_clear_buffer_journal_prepared(bh); 3166 prepared = test_clear_buffer_journal_prepared(bh);
3158 clear_buffer_journal_restore_dirty(bh); 3167 clear_buffer_journal_restore_dirty(bh);
3159 /* already in this transaction, we are done */ 3168 /* already in this transaction, we are done */
3160 if (buffer_journaled(bh)) { 3169 if (buffer_journaled(bh)) {
3161 PROC_INFO_INC(p_s_sb, journal.mark_dirty_already); 3170 PROC_INFO_INC(p_s_sb, journal.mark_dirty_already);
3162 return 0; 3171 return 0;
3163 } 3172 }
3164 3173
3165 /* this must be turned into a panic instead of a warning. We can't allow 3174 /* this must be turned into a panic instead of a warning. We can't allow
3166 ** a dirty or journal_dirty or locked buffer to be logged, as some changes 3175 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
3167 ** could get to disk too early. NOT GOOD. 3176 ** could get to disk too early. NOT GOOD.
3168 */ 3177 */
3169 if (!prepared || buffer_dirty(bh)) { 3178 if (!prepared || buffer_dirty(bh)) {
3170 reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state " 3179 reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state "
3171 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", 3180 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
3172 (unsigned long long)bh->b_blocknr, 3181 (unsigned long long)bh->b_blocknr,
3173 prepared ? ' ' : '!', 3182 prepared ? ' ' : '!',
3174 buffer_locked(bh) ? ' ' : '!', 3183 buffer_locked(bh) ? ' ' : '!',
3175 buffer_dirty(bh) ? ' ' : '!', 3184 buffer_dirty(bh) ? ' ' : '!',
3176 buffer_journal_dirty(bh) ? ' ' : '!'); 3185 buffer_journal_dirty(bh) ? ' ' : '!');
3177 } 3186 }
3178 3187
3179 if (atomic_read(&(journal->j_wcount)) <= 0) { 3188 if (atomic_read(&(journal->j_wcount)) <= 0) {
3180 reiserfs_warning(p_s_sb, 3189 reiserfs_warning(p_s_sb,
3181 "journal-1409: journal_mark_dirty returning because j_wcount was %d", 3190 "journal-1409: journal_mark_dirty returning because j_wcount was %d",
3182 atomic_read(&(journal->j_wcount))); 3191 atomic_read(&(journal->j_wcount)));
3183 return 1; 3192 return 1;
3184 } 3193 }
3185 /* this error means I've screwed up, and we've overflowed the transaction. 3194 /* this error means I've screwed up, and we've overflowed the transaction.
3186 ** Nothing can be done here, except make the FS readonly or panic. 3195 ** Nothing can be done here, except make the FS readonly or panic.
3187 */ 3196 */
3188 if (journal->j_len >= journal->j_trans_max) { 3197 if (journal->j_len >= journal->j_trans_max) {
3189 reiserfs_panic(th->t_super, 3198 reiserfs_panic(th->t_super,
3190 "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", 3199 "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n",
3191 journal->j_len); 3200 journal->j_len);
3192 } 3201 }
3193 3202
3194 if (buffer_journal_dirty(bh)) { 3203 if (buffer_journal_dirty(bh)) {
3195 count_already_incd = 1; 3204 count_already_incd = 1;
3196 PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal); 3205 PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal);
3197 clear_buffer_journal_dirty(bh); 3206 clear_buffer_journal_dirty(bh);
3198 } 3207 }
3199 3208
3200 if (journal->j_len > journal->j_len_alloc) { 3209 if (journal->j_len > journal->j_len_alloc) {
3201 journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; 3210 journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
3202 } 3211 }
3203 3212
3204 set_buffer_journaled(bh); 3213 set_buffer_journaled(bh);
3205 3214
3206 /* now put this guy on the end */ 3215 /* now put this guy on the end */
3207 if (!cn) { 3216 if (!cn) {
3208 cn = get_cnode(p_s_sb); 3217 cn = get_cnode(p_s_sb);
3209 if (!cn) { 3218 if (!cn) {
3210 reiserfs_panic(p_s_sb, "get_cnode failed!\n"); 3219 reiserfs_panic(p_s_sb, "get_cnode failed!\n");
3211 } 3220 }
3212 3221
3213 if (th->t_blocks_logged == th->t_blocks_allocated) { 3222 if (th->t_blocks_logged == th->t_blocks_allocated) {
3214 th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; 3223 th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
3215 journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; 3224 journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
3216 } 3225 }
3217 th->t_blocks_logged++; 3226 th->t_blocks_logged++;
3218 journal->j_len++; 3227 journal->j_len++;
3219 3228
3220 cn->bh = bh; 3229 cn->bh = bh;
3221 cn->blocknr = bh->b_blocknr; 3230 cn->blocknr = bh->b_blocknr;
3222 cn->sb = p_s_sb; 3231 cn->sb = p_s_sb;
3223 cn->jlist = NULL; 3232 cn->jlist = NULL;
3224 insert_journal_hash(journal->j_hash_table, cn); 3233 insert_journal_hash(journal->j_hash_table, cn);
3225 if (!count_already_incd) { 3234 if (!count_already_incd) {
3226 get_bh(bh); 3235 get_bh(bh);
3227 } 3236 }
3228 } 3237 }
3229 cn->next = NULL; 3238 cn->next = NULL;
3230 cn->prev = journal->j_last; 3239 cn->prev = journal->j_last;
3231 cn->bh = bh; 3240 cn->bh = bh;
3232 if (journal->j_last) { 3241 if (journal->j_last) {
3233 journal->j_last->next = cn; 3242 journal->j_last->next = cn;
3234 journal->j_last = cn; 3243 journal->j_last = cn;
3235 } else { 3244 } else {
3236 journal->j_first = cn; 3245 journal->j_first = cn;
3237 journal->j_last = cn; 3246 journal->j_last = cn;
3238 } 3247 }
3239 return 0; 3248 return 0;
3240 } 3249 }
3241 3250
3242 int journal_end(struct reiserfs_transaction_handle *th, 3251 int journal_end(struct reiserfs_transaction_handle *th,
3243 struct super_block *p_s_sb, unsigned long nblocks) 3252 struct super_block *p_s_sb, unsigned long nblocks)
3244 { 3253 {
3245 if (!current->journal_info && th->t_refcount > 1) 3254 if (!current->journal_info && th->t_refcount > 1)
3246 reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d", 3255 reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d",
3247 th->t_refcount); 3256 th->t_refcount);
3248 3257
3249 if (!th->t_trans_id) { 3258 if (!th->t_trans_id) {
3250 WARN_ON(1); 3259 WARN_ON(1);
3251 return -EIO; 3260 return -EIO;
3252 } 3261 }
3253 3262
3254 th->t_refcount--; 3263 th->t_refcount--;
3255 if (th->t_refcount > 0) { 3264 if (th->t_refcount > 0) {
3256 struct reiserfs_transaction_handle *cur_th = 3265 struct reiserfs_transaction_handle *cur_th =
3257 current->journal_info; 3266 current->journal_info;
3258 3267
3259 /* we aren't allowed to close a nested transaction on a different 3268 /* we aren't allowed to close a nested transaction on a different
3260 ** filesystem from the one in the task struct 3269 ** filesystem from the one in the task struct
3261 */ 3270 */
3262 if (cur_th->t_super != th->t_super) 3271 if (cur_th->t_super != th->t_super)
3263 BUG(); 3272 BUG();
3264 3273
3265 if (th != cur_th) { 3274 if (th != cur_th) {
3266 memcpy(current->journal_info, th, sizeof(*th)); 3275 memcpy(current->journal_info, th, sizeof(*th));
3267 th->t_trans_id = 0; 3276 th->t_trans_id = 0;
3268 } 3277 }
3269 return 0; 3278 return 0;
3270 } else { 3279 } else {
3271 return do_journal_end(th, p_s_sb, nblocks, 0); 3280 return do_journal_end(th, p_s_sb, nblocks, 0);
3272 } 3281 }
3273 } 3282 }
3274 3283
3275 /* removes from the current transaction, relsing and descrementing any counters. 3284 /* removes from the current transaction, relsing and descrementing any counters.
3276 ** also files the removed buffer directly onto the clean list 3285 ** also files the removed buffer directly onto the clean list
3277 ** 3286 **
3278 ** called by journal_mark_freed when a block has been deleted 3287 ** called by journal_mark_freed when a block has been deleted
3279 ** 3288 **
3280 ** returns 1 if it cleaned and relsed the buffer. 0 otherwise 3289 ** returns 1 if it cleaned and relsed the buffer. 0 otherwise
3281 */ 3290 */
3282 static int remove_from_transaction(struct super_block *p_s_sb, 3291 static int remove_from_transaction(struct super_block *p_s_sb,
3283 b_blocknr_t blocknr, int already_cleaned) 3292 b_blocknr_t blocknr, int already_cleaned)
3284 { 3293 {
3285 struct buffer_head *bh; 3294 struct buffer_head *bh;
3286 struct reiserfs_journal_cnode *cn; 3295 struct reiserfs_journal_cnode *cn;
3287 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3296 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3288 int ret = 0; 3297 int ret = 0;
3289 3298
3290 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); 3299 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
3291 if (!cn || !cn->bh) { 3300 if (!cn || !cn->bh) {
3292 return ret; 3301 return ret;
3293 } 3302 }
3294 bh = cn->bh; 3303 bh = cn->bh;
3295 if (cn->prev) { 3304 if (cn->prev) {
3296 cn->prev->next = cn->next; 3305 cn->prev->next = cn->next;
3297 } 3306 }
3298 if (cn->next) { 3307 if (cn->next) {
3299 cn->next->prev = cn->prev; 3308 cn->next->prev = cn->prev;
3300 } 3309 }
3301 if (cn == journal->j_first) { 3310 if (cn == journal->j_first) {
3302 journal->j_first = cn->next; 3311 journal->j_first = cn->next;
3303 } 3312 }
3304 if (cn == journal->j_last) { 3313 if (cn == journal->j_last) {
3305 journal->j_last = cn->prev; 3314 journal->j_last = cn->prev;
3306 } 3315 }
3307 if (bh) 3316 if (bh)
3308 remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, 3317 remove_journal_hash(p_s_sb, journal->j_hash_table, NULL,
3309 bh->b_blocknr, 0); 3318 bh->b_blocknr, 0);
3310 clear_buffer_journaled(bh); /* don't log this one */ 3319 clear_buffer_journaled(bh); /* don't log this one */
3311 3320
3312 if (!already_cleaned) { 3321 if (!already_cleaned) {
3313 clear_buffer_journal_dirty(bh); 3322 clear_buffer_journal_dirty(bh);
3314 clear_buffer_dirty(bh); 3323 clear_buffer_dirty(bh);
3315 clear_buffer_journal_test(bh); 3324 clear_buffer_journal_test(bh);
3316 put_bh(bh); 3325 put_bh(bh);
3317 if (atomic_read(&(bh->b_count)) < 0) { 3326 if (atomic_read(&(bh->b_count)) < 0) {
3318 reiserfs_warning(p_s_sb, 3327 reiserfs_warning(p_s_sb,
3319 "journal-1752: remove from trans, b_count < 0"); 3328 "journal-1752: remove from trans, b_count < 0");
3320 } 3329 }
3321 ret = 1; 3330 ret = 1;
3322 } 3331 }
3323 journal->j_len--; 3332 journal->j_len--;
3324 journal->j_len_alloc--; 3333 journal->j_len_alloc--;
3325 free_cnode(p_s_sb, cn); 3334 free_cnode(p_s_sb, cn);
3326 return ret; 3335 return ret;
3327 } 3336 }
3328 3337
3329 /* 3338 /*
3330 ** for any cnode in a journal list, it can only be dirtied of all the 3339 ** for any cnode in a journal list, it can only be dirtied of all the
3331 ** transactions that include it are commited to disk. 3340 ** transactions that include it are commited to disk.
3332 ** this checks through each transaction, and returns 1 if you are allowed to dirty, 3341 ** this checks through each transaction, and returns 1 if you are allowed to dirty,
3333 ** and 0 if you aren't 3342 ** and 0 if you aren't
3334 ** 3343 **
3335 ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log 3344 ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
3336 ** blocks for a given transaction on disk 3345 ** blocks for a given transaction on disk
3337 ** 3346 **
3338 */ 3347 */
3339 static int can_dirty(struct reiserfs_journal_cnode *cn) 3348 static int can_dirty(struct reiserfs_journal_cnode *cn)
3340 { 3349 {
3341 struct super_block *sb = cn->sb; 3350 struct super_block *sb = cn->sb;
3342 b_blocknr_t blocknr = cn->blocknr; 3351 b_blocknr_t blocknr = cn->blocknr;
3343 struct reiserfs_journal_cnode *cur = cn->hprev; 3352 struct reiserfs_journal_cnode *cur = cn->hprev;
3344 int can_dirty = 1; 3353 int can_dirty = 1;
3345 3354
3346 /* first test hprev. These are all newer than cn, so any node here 3355 /* first test hprev. These are all newer than cn, so any node here
3347 ** with the same block number and dev means this node can't be sent 3356 ** with the same block number and dev means this node can't be sent
3348 ** to disk right now. 3357 ** to disk right now.
3349 */ 3358 */
3350 while (cur && can_dirty) { 3359 while (cur && can_dirty) {
3351 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && 3360 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
3352 cur->blocknr == blocknr) { 3361 cur->blocknr == blocknr) {
3353 can_dirty = 0; 3362 can_dirty = 0;
3354 } 3363 }
3355 cur = cur->hprev; 3364 cur = cur->hprev;
3356 } 3365 }
3357 /* then test hnext. These are all older than cn. As long as they 3366 /* then test hnext. These are all older than cn. As long as they
3358 ** are committed to the log, it is safe to write cn to disk 3367 ** are committed to the log, it is safe to write cn to disk
3359 */ 3368 */
3360 cur = cn->hnext; 3369 cur = cn->hnext;
3361 while (cur && can_dirty) { 3370 while (cur && can_dirty) {
3362 if (cur->jlist && cur->jlist->j_len > 0 && 3371 if (cur->jlist && cur->jlist->j_len > 0 &&
3363 atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && 3372 atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
3364 cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { 3373 cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
3365 can_dirty = 0; 3374 can_dirty = 0;
3366 } 3375 }
3367 cur = cur->hnext; 3376 cur = cur->hnext;
3368 } 3377 }
3369 return can_dirty; 3378 return can_dirty;
3370 } 3379 }
3371 3380
3372 /* syncs the commit blocks, but does not force the real buffers to disk 3381 /* syncs the commit blocks, but does not force the real buffers to disk
3373 ** will wait until the current transaction is done/commited before returning 3382 ** will wait until the current transaction is done/commited before returning
3374 */ 3383 */
3375 int journal_end_sync(struct reiserfs_transaction_handle *th, 3384 int journal_end_sync(struct reiserfs_transaction_handle *th,
3376 struct super_block *p_s_sb, unsigned long nblocks) 3385 struct super_block *p_s_sb, unsigned long nblocks)
3377 { 3386 {
3378 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3387 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3379 3388
3380 BUG_ON(!th->t_trans_id); 3389 BUG_ON(!th->t_trans_id);
3381 /* you can sync while nested, very, very bad */ 3390 /* you can sync while nested, very, very bad */
3382 if (th->t_refcount > 1) { 3391 if (th->t_refcount > 1) {
3383 BUG(); 3392 BUG();
3384 } 3393 }
3385 if (journal->j_len == 0) { 3394 if (journal->j_len == 0) {
3386 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 3395 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3387 1); 3396 1);
3388 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); 3397 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
3389 } 3398 }
3390 return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT); 3399 return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT);
3391 } 3400 }
3392 3401
3393 /* 3402 /*
3394 ** writeback the pending async commits to disk 3403 ** writeback the pending async commits to disk
3395 */ 3404 */
3396 static void flush_async_commits(void *p) 3405 static void flush_async_commits(void *p)
3397 { 3406 {
3398 struct super_block *p_s_sb = p; 3407 struct super_block *p_s_sb = p;
3399 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3408 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3400 struct reiserfs_journal_list *jl; 3409 struct reiserfs_journal_list *jl;
3401 struct list_head *entry; 3410 struct list_head *entry;
3402 3411
3403 lock_kernel(); 3412 lock_kernel();
3404 if (!list_empty(&journal->j_journal_list)) { 3413 if (!list_empty(&journal->j_journal_list)) {
3405 /* last entry is the youngest, commit it and you get everything */ 3414 /* last entry is the youngest, commit it and you get everything */
3406 entry = journal->j_journal_list.prev; 3415 entry = journal->j_journal_list.prev;
3407 jl = JOURNAL_LIST_ENTRY(entry); 3416 jl = JOURNAL_LIST_ENTRY(entry);
3408 flush_commit_list(p_s_sb, jl, 1); 3417 flush_commit_list(p_s_sb, jl, 1);
3409 } 3418 }
3410 unlock_kernel(); 3419 unlock_kernel();
3411 /* 3420 /*
3412 * this is a little racey, but there's no harm in missing 3421 * this is a little racey, but there's no harm in missing
3413 * the filemap_fdata_write 3422 * the filemap_fdata_write
3414 */ 3423 */
3415 if (!atomic_read(&journal->j_async_throttle) 3424 if (!atomic_read(&journal->j_async_throttle)
3416 && !reiserfs_is_journal_aborted(journal)) { 3425 && !reiserfs_is_journal_aborted(journal)) {
3417 atomic_inc(&journal->j_async_throttle); 3426 atomic_inc(&journal->j_async_throttle);
3418 filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); 3427 filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
3419 atomic_dec(&journal->j_async_throttle); 3428 atomic_dec(&journal->j_async_throttle);
3420 } 3429 }
3421 } 3430 }
3422 3431
3423 /* 3432 /*
3424 ** flushes any old transactions to disk 3433 ** flushes any old transactions to disk
3425 ** ends the current transaction if it is too old 3434 ** ends the current transaction if it is too old
3426 */ 3435 */
3427 int reiserfs_flush_old_commits(struct super_block *p_s_sb) 3436 int reiserfs_flush_old_commits(struct super_block *p_s_sb)
3428 { 3437 {
3429 time_t now; 3438 time_t now;
3430 struct reiserfs_transaction_handle th; 3439 struct reiserfs_transaction_handle th;
3431 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3440 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3432 3441
3433 now = get_seconds(); 3442 now = get_seconds();
3434 /* safety check so we don't flush while we are replaying the log during 3443 /* safety check so we don't flush while we are replaying the log during
3435 * mount 3444 * mount
3436 */ 3445 */
3437 if (list_empty(&journal->j_journal_list)) { 3446 if (list_empty(&journal->j_journal_list)) {
3438 return 0; 3447 return 0;
3439 } 3448 }
3440 3449
3441 /* check the current transaction. If there are no writers, and it is 3450 /* check the current transaction. If there are no writers, and it is
3442 * too old, finish it, and force the commit blocks to disk 3451 * too old, finish it, and force the commit blocks to disk
3443 */ 3452 */
3444 if (atomic_read(&journal->j_wcount) <= 0 && 3453 if (atomic_read(&journal->j_wcount) <= 0 &&
3445 journal->j_trans_start_time > 0 && 3454 journal->j_trans_start_time > 0 &&
3446 journal->j_len > 0 && 3455 journal->j_len > 0 &&
3447 (now - journal->j_trans_start_time) > journal->j_max_trans_age) { 3456 (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3448 if (!journal_join(&th, p_s_sb, 1)) { 3457 if (!journal_join(&th, p_s_sb, 1)) {
3449 reiserfs_prepare_for_journal(p_s_sb, 3458 reiserfs_prepare_for_journal(p_s_sb,
3450 SB_BUFFER_WITH_SB(p_s_sb), 3459 SB_BUFFER_WITH_SB(p_s_sb),
3451 1); 3460 1);
3452 journal_mark_dirty(&th, p_s_sb, 3461 journal_mark_dirty(&th, p_s_sb,
3453 SB_BUFFER_WITH_SB(p_s_sb)); 3462 SB_BUFFER_WITH_SB(p_s_sb));
3454 3463
3455 /* we're only being called from kreiserfsd, it makes no sense to do 3464 /* we're only being called from kreiserfsd, it makes no sense to do
3456 ** an async commit so that kreiserfsd can do it later 3465 ** an async commit so that kreiserfsd can do it later
3457 */ 3466 */
3458 do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT); 3467 do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT);
3459 } 3468 }
3460 } 3469 }
3461 return p_s_sb->s_dirt; 3470 return p_s_sb->s_dirt;
3462 } 3471 }
3463 3472
3464 /* 3473 /*
3465 ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit 3474 ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
3466 ** 3475 **
3467 ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 3476 ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
3468 ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just 3477 ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just
3469 ** flushes the commit list and returns 0. 3478 ** flushes the commit list and returns 0.
3470 ** 3479 **
3471 ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. 3480 ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait.
3472 ** 3481 **
3473 ** Note, we can't allow the journal_end to proceed while there are still writers in the log. 3482 ** Note, we can't allow the journal_end to proceed while there are still writers in the log.
3474 */ 3483 */
3475 static int check_journal_end(struct reiserfs_transaction_handle *th, 3484 static int check_journal_end(struct reiserfs_transaction_handle *th,
3476 struct super_block *p_s_sb, unsigned long nblocks, 3485 struct super_block *p_s_sb, unsigned long nblocks,
3477 int flags) 3486 int flags)
3478 { 3487 {
3479 3488
3480 time_t now; 3489 time_t now;
3481 int flush = flags & FLUSH_ALL; 3490 int flush = flags & FLUSH_ALL;
3482 int commit_now = flags & COMMIT_NOW; 3491 int commit_now = flags & COMMIT_NOW;
3483 int wait_on_commit = flags & WAIT; 3492 int wait_on_commit = flags & WAIT;
3484 struct reiserfs_journal_list *jl; 3493 struct reiserfs_journal_list *jl;
3485 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3494 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3486 3495
3487 BUG_ON(!th->t_trans_id); 3496 BUG_ON(!th->t_trans_id);
3488 3497
3489 if (th->t_trans_id != journal->j_trans_id) { 3498 if (th->t_trans_id != journal->j_trans_id) {
3490 reiserfs_panic(th->t_super, 3499 reiserfs_panic(th->t_super,
3491 "journal-1577: handle trans id %ld != current trans id %ld\n", 3500 "journal-1577: handle trans id %ld != current trans id %ld\n",
3492 th->t_trans_id, journal->j_trans_id); 3501 th->t_trans_id, journal->j_trans_id);
3493 } 3502 }
3494 3503
3495 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); 3504 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
3496 if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ 3505 if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */
3497 atomic_dec(&(journal->j_wcount)); 3506 atomic_dec(&(journal->j_wcount));
3498 } 3507 }
3499 3508
3500 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 3509 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
3501 ** will be dealt with by next transaction that actually writes something, but should be taken 3510 ** will be dealt with by next transaction that actually writes something, but should be taken
3502 ** care of in this trans 3511 ** care of in this trans
3503 */ 3512 */
3504 if (journal->j_len == 0) { 3513 if (journal->j_len == 0) {
3505 BUG(); 3514 BUG();
3506 } 3515 }
3507 /* if wcount > 0, and we are called to with flush or commit_now, 3516 /* if wcount > 0, and we are called to with flush or commit_now,
3508 ** we wait on j_join_wait. We will wake up when the last writer has 3517 ** we wait on j_join_wait. We will wake up when the last writer has
3509 ** finished the transaction, and started it on its way to the disk. 3518 ** finished the transaction, and started it on its way to the disk.
3510 ** Then, we flush the commit or journal list, and just return 0 3519 ** Then, we flush the commit or journal list, and just return 0
3511 ** because the rest of journal end was already done for this transaction. 3520 ** because the rest of journal end was already done for this transaction.
3512 */ 3521 */
3513 if (atomic_read(&(journal->j_wcount)) > 0) { 3522 if (atomic_read(&(journal->j_wcount)) > 0) {
3514 if (flush || commit_now) { 3523 if (flush || commit_now) {
3515 unsigned trans_id; 3524 unsigned trans_id;
3516 3525
3517 jl = journal->j_current_jl; 3526 jl = journal->j_current_jl;
3518 trans_id = jl->j_trans_id; 3527 trans_id = jl->j_trans_id;
3519 if (wait_on_commit) 3528 if (wait_on_commit)
3520 jl->j_state |= LIST_COMMIT_PENDING; 3529 jl->j_state |= LIST_COMMIT_PENDING;
3521 atomic_set(&(journal->j_jlock), 1); 3530 atomic_set(&(journal->j_jlock), 1);
3522 if (flush) { 3531 if (flush) {
3523 journal->j_next_full_flush = 1; 3532 journal->j_next_full_flush = 1;
3524 } 3533 }
3525 unlock_journal(p_s_sb); 3534 unlock_journal(p_s_sb);
3526 3535
3527 /* sleep while the current transaction is still j_jlocked */ 3536 /* sleep while the current transaction is still j_jlocked */
3528 while (journal->j_trans_id == trans_id) { 3537 while (journal->j_trans_id == trans_id) {
3529 if (atomic_read(&journal->j_jlock)) { 3538 if (atomic_read(&journal->j_jlock)) {
3530 queue_log_writer(p_s_sb); 3539 queue_log_writer(p_s_sb);
3531 } else { 3540 } else {
3532 lock_journal(p_s_sb); 3541 lock_journal(p_s_sb);
3533 if (journal->j_trans_id == trans_id) { 3542 if (journal->j_trans_id == trans_id) {
3534 atomic_set(&(journal->j_jlock), 3543 atomic_set(&(journal->j_jlock),
3535 1); 3544 1);
3536 } 3545 }
3537 unlock_journal(p_s_sb); 3546 unlock_journal(p_s_sb);
3538 } 3547 }
3539 } 3548 }
3540 if (journal->j_trans_id == trans_id) { 3549 if (journal->j_trans_id == trans_id) {
3541 BUG(); 3550 BUG();
3542 } 3551 }
3543 if (commit_now 3552 if (commit_now
3544 && journal_list_still_alive(p_s_sb, trans_id) 3553 && journal_list_still_alive(p_s_sb, trans_id)
3545 && wait_on_commit) { 3554 && wait_on_commit) {
3546 flush_commit_list(p_s_sb, jl, 1); 3555 flush_commit_list(p_s_sb, jl, 1);
3547 } 3556 }
3548 return 0; 3557 return 0;
3549 } 3558 }
3550 unlock_journal(p_s_sb); 3559 unlock_journal(p_s_sb);
3551 return 0; 3560 return 0;
3552 } 3561 }
3553 3562
3554 /* deal with old transactions where we are the last writers */ 3563 /* deal with old transactions where we are the last writers */
3555 now = get_seconds(); 3564 now = get_seconds();
3556 if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { 3565 if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3557 commit_now = 1; 3566 commit_now = 1;
3558 journal->j_next_async_flush = 1; 3567 journal->j_next_async_flush = 1;
3559 } 3568 }
3560 /* don't batch when someone is waiting on j_join_wait */ 3569 /* don't batch when someone is waiting on j_join_wait */
3561 /* don't batch when syncing the commit or flushing the whole trans */ 3570 /* don't batch when syncing the commit or flushing the whole trans */
3562 if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) 3571 if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3563 && !flush && !commit_now && (journal->j_len < journal->j_max_batch) 3572 && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3564 && journal->j_len_alloc < journal->j_max_batch 3573 && journal->j_len_alloc < journal->j_max_batch
3565 && journal->j_cnode_free > (journal->j_trans_max * 3)) { 3574 && journal->j_cnode_free > (journal->j_trans_max * 3)) {
3566 journal->j_bcount++; 3575 journal->j_bcount++;
3567 unlock_journal(p_s_sb); 3576 unlock_journal(p_s_sb);
3568 return 0; 3577 return 0;
3569 } 3578 }
3570 3579
3571 if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 3580 if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
3572 reiserfs_panic(p_s_sb, 3581 reiserfs_panic(p_s_sb,
3573 "journal-003: journal_end: j_start (%ld) is too high\n", 3582 "journal-003: journal_end: j_start (%ld) is too high\n",
3574 journal->j_start); 3583 journal->j_start);
3575 } 3584 }
3576 return 1; 3585 return 1;
3577 } 3586 }
3578 3587
3579 /* 3588 /*
3580 ** Does all the work that makes deleting blocks safe. 3589 ** Does all the work that makes deleting blocks safe.
3581 ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. 3590 ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
3582 ** 3591 **
3583 ** otherwise: 3592 ** otherwise:
3584 ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes 3593 ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes
3585 ** before this transaction has finished. 3594 ** before this transaction has finished.
3586 ** 3595 **
3587 ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with 3596 ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with
3588 ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, 3597 ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash,
3589 ** the block can't be reallocated yet. 3598 ** the block can't be reallocated yet.
3590 ** 3599 **
3591 ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 3600 ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
3592 */ 3601 */
3593 int journal_mark_freed(struct reiserfs_transaction_handle *th, 3602 int journal_mark_freed(struct reiserfs_transaction_handle *th,
3594 struct super_block *p_s_sb, b_blocknr_t blocknr) 3603 struct super_block *p_s_sb, b_blocknr_t blocknr)
3595 { 3604 {
3596 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3605 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3597 struct reiserfs_journal_cnode *cn = NULL; 3606 struct reiserfs_journal_cnode *cn = NULL;
3598 struct buffer_head *bh = NULL; 3607 struct buffer_head *bh = NULL;
3599 struct reiserfs_list_bitmap *jb = NULL; 3608 struct reiserfs_list_bitmap *jb = NULL;
3600 int cleaned = 0; 3609 int cleaned = 0;
3601 BUG_ON(!th->t_trans_id); 3610 BUG_ON(!th->t_trans_id);
3602 3611
3603 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); 3612 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
3604 if (cn && cn->bh) { 3613 if (cn && cn->bh) {
3605 bh = cn->bh; 3614 bh = cn->bh;
3606 get_bh(bh); 3615 get_bh(bh);
3607 } 3616 }
3608 /* if it is journal new, we just remove it from this transaction */ 3617 /* if it is journal new, we just remove it from this transaction */
3609 if (bh && buffer_journal_new(bh)) { 3618 if (bh && buffer_journal_new(bh)) {
3610 clear_buffer_journal_new(bh); 3619 clear_buffer_journal_new(bh);
3611 clear_prepared_bits(bh); 3620 clear_prepared_bits(bh);
3612 reiserfs_clean_and_file_buffer(bh); 3621 reiserfs_clean_and_file_buffer(bh);
3613 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); 3622 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
3614 } else { 3623 } else {
3615 /* set the bit for this block in the journal bitmap for this transaction */ 3624 /* set the bit for this block in the journal bitmap for this transaction */
3616 jb = journal->j_current_jl->j_list_bitmap; 3625 jb = journal->j_current_jl->j_list_bitmap;
3617 if (!jb) { 3626 if (!jb) {
3618 reiserfs_panic(p_s_sb, 3627 reiserfs_panic(p_s_sb,
3619 "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n"); 3628 "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n");
3620 } 3629 }
3621 set_bit_in_list_bitmap(p_s_sb, blocknr, jb); 3630 set_bit_in_list_bitmap(p_s_sb, blocknr, jb);
3622 3631
3623 /* Note, the entire while loop is not allowed to schedule. */ 3632 /* Note, the entire while loop is not allowed to schedule. */
3624 3633
3625 if (bh) { 3634 if (bh) {
3626 clear_prepared_bits(bh); 3635 clear_prepared_bits(bh);
3627 reiserfs_clean_and_file_buffer(bh); 3636 reiserfs_clean_and_file_buffer(bh);
3628 } 3637 }
3629 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); 3638 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
3630 3639
3631 /* find all older transactions with this block, make sure they don't try to write it out */ 3640 /* find all older transactions with this block, make sure they don't try to write it out */
3632 cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, 3641 cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table,
3633 blocknr); 3642 blocknr);
3634 while (cn) { 3643 while (cn) {
3635 if (p_s_sb == cn->sb && blocknr == cn->blocknr) { 3644 if (p_s_sb == cn->sb && blocknr == cn->blocknr) {
3636 set_bit(BLOCK_FREED, &cn->state); 3645 set_bit(BLOCK_FREED, &cn->state);
3637 if (cn->bh) { 3646 if (cn->bh) {
3638 if (!cleaned) { 3647 if (!cleaned) {
3639 /* remove_from_transaction will brelse the buffer if it was 3648 /* remove_from_transaction will brelse the buffer if it was
3640 ** in the current trans 3649 ** in the current trans
3641 */ 3650 */
3642 clear_buffer_journal_dirty(cn-> 3651 clear_buffer_journal_dirty(cn->
3643 bh); 3652 bh);
3644 clear_buffer_dirty(cn->bh); 3653 clear_buffer_dirty(cn->bh);
3645 clear_buffer_journal_test(cn-> 3654 clear_buffer_journal_test(cn->
3646 bh); 3655 bh);
3647 cleaned = 1; 3656 cleaned = 1;
3648 put_bh(cn->bh); 3657 put_bh(cn->bh);
3649 if (atomic_read 3658 if (atomic_read
3650 (&(cn->bh->b_count)) < 0) { 3659 (&(cn->bh->b_count)) < 0) {
3651 reiserfs_warning(p_s_sb, 3660 reiserfs_warning(p_s_sb,
3652 "journal-2138: cn->bh->b_count < 0"); 3661 "journal-2138: cn->bh->b_count < 0");
3653 } 3662 }
3654 } 3663 }
3655 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3664 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */
3656 atomic_dec(& 3665 atomic_dec(&
3657 (cn->jlist-> 3666 (cn->jlist->
3658 j_nonzerolen)); 3667 j_nonzerolen));
3659 } 3668 }
3660 cn->bh = NULL; 3669 cn->bh = NULL;
3661 } 3670 }
3662 } 3671 }
3663 cn = cn->hnext; 3672 cn = cn->hnext;
3664 } 3673 }
3665 } 3674 }
3666 3675
3667 if (bh) { 3676 if (bh) {
3668 put_bh(bh); /* get_hash grabs the buffer */ 3677 put_bh(bh); /* get_hash grabs the buffer */
3669 if (atomic_read(&(bh->b_count)) < 0) { 3678 if (atomic_read(&(bh->b_count)) < 0) {
3670 reiserfs_warning(p_s_sb, 3679 reiserfs_warning(p_s_sb,
3671 "journal-2165: bh->b_count < 0"); 3680 "journal-2165: bh->b_count < 0");
3672 } 3681 }
3673 } 3682 }
3674 return 0; 3683 return 0;
3675 } 3684 }
3676 3685
3677 void reiserfs_update_inode_transaction(struct inode *inode) 3686 void reiserfs_update_inode_transaction(struct inode *inode)
3678 { 3687 {
3679 struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); 3688 struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
3680 REISERFS_I(inode)->i_jl = journal->j_current_jl; 3689 REISERFS_I(inode)->i_jl = journal->j_current_jl;
3681 REISERFS_I(inode)->i_trans_id = journal->j_trans_id; 3690 REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
3682 } 3691 }
3683 3692
3684 /* 3693 /*
3685 * returns -1 on error, 0 if no commits/barriers were done and 1 3694 * returns -1 on error, 0 if no commits/barriers were done and 1
3686 * if a transaction was actually committed and the barrier was done 3695 * if a transaction was actually committed and the barrier was done
3687 */ 3696 */
3688 static int __commit_trans_jl(struct inode *inode, unsigned long id, 3697 static int __commit_trans_jl(struct inode *inode, unsigned long id,
3689 struct reiserfs_journal_list *jl) 3698 struct reiserfs_journal_list *jl)
3690 { 3699 {
3691 struct reiserfs_transaction_handle th; 3700 struct reiserfs_transaction_handle th;
3692 struct super_block *sb = inode->i_sb; 3701 struct super_block *sb = inode->i_sb;
3693 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3702 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3694 int ret = 0; 3703 int ret = 0;
3695 3704
3696 /* is it from the current transaction, or from an unknown transaction? */ 3705 /* is it from the current transaction, or from an unknown transaction? */
3697 if (id == journal->j_trans_id) { 3706 if (id == journal->j_trans_id) {
3698 jl = journal->j_current_jl; 3707 jl = journal->j_current_jl;
3699 /* try to let other writers come in and grow this transaction */ 3708 /* try to let other writers come in and grow this transaction */
3700 let_transaction_grow(sb, id); 3709 let_transaction_grow(sb, id);
3701 if (journal->j_trans_id != id) { 3710 if (journal->j_trans_id != id) {
3702 goto flush_commit_only; 3711 goto flush_commit_only;
3703 } 3712 }
3704 3713
3705 ret = journal_begin(&th, sb, 1); 3714 ret = journal_begin(&th, sb, 1);
3706 if (ret) 3715 if (ret)
3707 return ret; 3716 return ret;
3708 3717
3709 /* someone might have ended this transaction while we joined */ 3718 /* someone might have ended this transaction while we joined */
3710 if (journal->j_trans_id != id) { 3719 if (journal->j_trans_id != id) {
3711 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3720 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3712 1); 3721 1);
3713 journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); 3722 journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
3714 ret = journal_end(&th, sb, 1); 3723 ret = journal_end(&th, sb, 1);
3715 goto flush_commit_only; 3724 goto flush_commit_only;
3716 } 3725 }
3717 3726
3718 ret = journal_end_sync(&th, sb, 1); 3727 ret = journal_end_sync(&th, sb, 1);
3719 if (!ret) 3728 if (!ret)
3720 ret = 1; 3729 ret = 1;
3721 3730
3722 } else { 3731 } else {
3723 /* this gets tricky, we have to make sure the journal list in 3732 /* this gets tricky, we have to make sure the journal list in
3724 * the inode still exists. We know the list is still around 3733 * the inode still exists. We know the list is still around
3725 * if we've got a larger transaction id than the oldest list 3734 * if we've got a larger transaction id than the oldest list
3726 */ 3735 */
3727 flush_commit_only: 3736 flush_commit_only:
3728 if (journal_list_still_alive(inode->i_sb, id)) { 3737 if (journal_list_still_alive(inode->i_sb, id)) {
3729 /* 3738 /*
3730 * we only set ret to 1 when we know for sure 3739 * we only set ret to 1 when we know for sure
3731 * the barrier hasn't been started yet on the commit 3740 * the barrier hasn't been started yet on the commit
3732 * block. 3741 * block.
3733 */ 3742 */
3734 if (atomic_read(&jl->j_commit_left) > 1) 3743 if (atomic_read(&jl->j_commit_left) > 1)
3735 ret = 1; 3744 ret = 1;
3736 flush_commit_list(sb, jl, 1); 3745 flush_commit_list(sb, jl, 1);
3737 if (journal->j_errno) 3746 if (journal->j_errno)
3738 ret = journal->j_errno; 3747 ret = journal->j_errno;
3739 } 3748 }
3740 } 3749 }
3741 /* otherwise the list is gone, and long since committed */ 3750 /* otherwise the list is gone, and long since committed */
3742 return ret; 3751 return ret;
3743 } 3752 }
3744 3753
3745 int reiserfs_commit_for_inode(struct inode *inode) 3754 int reiserfs_commit_for_inode(struct inode *inode)
3746 { 3755 {
3747 unsigned long id = REISERFS_I(inode)->i_trans_id; 3756 unsigned long id = REISERFS_I(inode)->i_trans_id;
3748 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 3757 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
3749 3758
3750 /* for the whole inode, assume unset id means it was 3759 /* for the whole inode, assume unset id means it was
3751 * changed in the current transaction. More conservative 3760 * changed in the current transaction. More conservative
3752 */ 3761 */
3753 if (!id || !jl) { 3762 if (!id || !jl) {
3754 reiserfs_update_inode_transaction(inode); 3763 reiserfs_update_inode_transaction(inode);
3755 id = REISERFS_I(inode)->i_trans_id; 3764 id = REISERFS_I(inode)->i_trans_id;
3756 /* jl will be updated in __commit_trans_jl */ 3765 /* jl will be updated in __commit_trans_jl */
3757 } 3766 }
3758 3767
3759 return __commit_trans_jl(inode, id, jl); 3768 return __commit_trans_jl(inode, id, jl);
3760 } 3769 }
3761 3770
3762 void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, 3771 void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
3763 struct buffer_head *bh) 3772 struct buffer_head *bh)
3764 { 3773 {
3765 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3774 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3766 PROC_INFO_INC(p_s_sb, journal.restore_prepared); 3775 PROC_INFO_INC(p_s_sb, journal.restore_prepared);
3767 if (!bh) { 3776 if (!bh) {
3768 return; 3777 return;
3769 } 3778 }
3770 if (test_clear_buffer_journal_restore_dirty(bh) && 3779 if (test_clear_buffer_journal_restore_dirty(bh) &&
3771 buffer_journal_dirty(bh)) { 3780 buffer_journal_dirty(bh)) {
3772 struct reiserfs_journal_cnode *cn; 3781 struct reiserfs_journal_cnode *cn;
3773 cn = get_journal_hash_dev(p_s_sb, 3782 cn = get_journal_hash_dev(p_s_sb,
3774 journal->j_list_hash_table, 3783 journal->j_list_hash_table,
3775 bh->b_blocknr); 3784 bh->b_blocknr);
3776 if (cn && can_dirty(cn)) { 3785 if (cn && can_dirty(cn)) {
3777 set_buffer_journal_test(bh); 3786 set_buffer_journal_test(bh);
3778 mark_buffer_dirty(bh); 3787 mark_buffer_dirty(bh);
3779 } 3788 }
3780 } 3789 }
3781 clear_buffer_journal_prepared(bh); 3790 clear_buffer_journal_prepared(bh);
3782 } 3791 }
3783 3792
3784 extern struct tree_balance *cur_tb; 3793 extern struct tree_balance *cur_tb;
3785 /* 3794 /*
3786 ** before we can change a metadata block, we have to make sure it won't 3795 ** before we can change a metadata block, we have to make sure it won't
3787 ** be written to disk while we are altering it. So, we must: 3796 ** be written to disk while we are altering it. So, we must:
3788 ** clean it 3797 ** clean it
3789 ** wait on it. 3798 ** wait on it.
3790 ** 3799 **
3791 */ 3800 */
3792 int reiserfs_prepare_for_journal(struct super_block *p_s_sb, 3801 int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
3793 struct buffer_head *bh, int wait) 3802 struct buffer_head *bh, int wait)
3794 { 3803 {
3795 PROC_INFO_INC(p_s_sb, journal.prepare); 3804 PROC_INFO_INC(p_s_sb, journal.prepare);
3796 3805
3797 if (test_set_buffer_locked(bh)) { 3806 if (test_set_buffer_locked(bh)) {
3798 if (!wait) 3807 if (!wait)
3799 return 0; 3808 return 0;
3800 lock_buffer(bh); 3809 lock_buffer(bh);
3801 } 3810 }
3802 set_buffer_journal_prepared(bh); 3811 set_buffer_journal_prepared(bh);
3803 if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { 3812 if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
3804 clear_buffer_journal_test(bh); 3813 clear_buffer_journal_test(bh);
3805 set_buffer_journal_restore_dirty(bh); 3814 set_buffer_journal_restore_dirty(bh);
3806 } 3815 }
3807 unlock_buffer(bh); 3816 unlock_buffer(bh);
3808 return 1; 3817 return 1;
3809 } 3818 }
3810 3819
3811 static void flush_old_journal_lists(struct super_block *s) 3820 static void flush_old_journal_lists(struct super_block *s)
3812 { 3821 {
3813 struct reiserfs_journal *journal = SB_JOURNAL(s); 3822 struct reiserfs_journal *journal = SB_JOURNAL(s);
3814 struct reiserfs_journal_list *jl; 3823 struct reiserfs_journal_list *jl;
3815 struct list_head *entry; 3824 struct list_head *entry;
3816 time_t now = get_seconds(); 3825 time_t now = get_seconds();
3817 3826
3818 while (!list_empty(&journal->j_journal_list)) { 3827 while (!list_empty(&journal->j_journal_list)) {
3819 entry = journal->j_journal_list.next; 3828 entry = journal->j_journal_list.next;
3820 jl = JOURNAL_LIST_ENTRY(entry); 3829 jl = JOURNAL_LIST_ENTRY(entry);
3821 /* this check should always be run, to send old lists to disk */ 3830 /* this check should always be run, to send old lists to disk */
3822 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { 3831 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
3823 flush_used_journal_lists(s, jl); 3832 flush_used_journal_lists(s, jl);
3824 } else { 3833 } else {
3825 break; 3834 break;
3826 } 3835 }
3827 } 3836 }
3828 } 3837 }
3829 3838
3830 /* 3839 /*
3831 ** long and ugly. If flush, will not return until all commit 3840 ** long and ugly. If flush, will not return until all commit
3832 ** blocks and all real buffers in the trans are on disk. 3841 ** blocks and all real buffers in the trans are on disk.
3833 ** If no_async, won't return until all commit blocks are on disk. 3842 ** If no_async, won't return until all commit blocks are on disk.
3834 ** 3843 **
3835 ** keep reading, there are comments as you go along 3844 ** keep reading, there are comments as you go along
3836 ** 3845 **
3837 ** If the journal is aborted, we just clean up. Things like flushing 3846 ** If the journal is aborted, we just clean up. Things like flushing
3838 ** journal lists, etc just won't happen. 3847 ** journal lists, etc just won't happen.
3839 */ 3848 */
3840 static int do_journal_end(struct reiserfs_transaction_handle *th, 3849 static int do_journal_end(struct reiserfs_transaction_handle *th,
3841 struct super_block *p_s_sb, unsigned long nblocks, 3850 struct super_block *p_s_sb, unsigned long nblocks,
3842 int flags) 3851 int flags)
3843 { 3852 {
3844 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3853 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3845 struct reiserfs_journal_cnode *cn, *next, *jl_cn; 3854 struct reiserfs_journal_cnode *cn, *next, *jl_cn;
3846 struct reiserfs_journal_cnode *last_cn = NULL; 3855 struct reiserfs_journal_cnode *last_cn = NULL;
3847 struct reiserfs_journal_desc *desc; 3856 struct reiserfs_journal_desc *desc;
3848 struct reiserfs_journal_commit *commit; 3857 struct reiserfs_journal_commit *commit;
3849 struct buffer_head *c_bh; /* commit bh */ 3858 struct buffer_head *c_bh; /* commit bh */
3850 struct buffer_head *d_bh; /* desc bh */ 3859 struct buffer_head *d_bh; /* desc bh */
3851 int cur_write_start = 0; /* start index of current log write */ 3860 int cur_write_start = 0; /* start index of current log write */
3852 int old_start; 3861 int old_start;
3853 int i; 3862 int i;
3854 int flush = flags & FLUSH_ALL; 3863 int flush = flags & FLUSH_ALL;
3855 int wait_on_commit = flags & WAIT; 3864 int wait_on_commit = flags & WAIT;
3856 struct reiserfs_journal_list *jl, *temp_jl; 3865 struct reiserfs_journal_list *jl, *temp_jl;
3857 struct list_head *entry, *safe; 3866 struct list_head *entry, *safe;
3858 unsigned long jindex; 3867 unsigned long jindex;
3859 unsigned long commit_trans_id; 3868 unsigned long commit_trans_id;
3860 int trans_half; 3869 int trans_half;
3861 3870
3862 BUG_ON(th->t_refcount > 1); 3871 BUG_ON(th->t_refcount > 1);
3863 BUG_ON(!th->t_trans_id); 3872 BUG_ON(!th->t_trans_id);
3864 3873
3865 put_fs_excl(); 3874 put_fs_excl();
3866 current->journal_info = th->t_handle_save; 3875 current->journal_info = th->t_handle_save;
3867 reiserfs_check_lock_depth(p_s_sb, "journal end"); 3876 reiserfs_check_lock_depth(p_s_sb, "journal end");
3868 if (journal->j_len == 0) { 3877 if (journal->j_len == 0) {
3869 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 3878 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3870 1); 3879 1);
3871 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); 3880 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
3872 } 3881 }
3873 3882
3874 lock_journal(p_s_sb); 3883 lock_journal(p_s_sb);
3875 if (journal->j_next_full_flush) { 3884 if (journal->j_next_full_flush) {
3876 flags |= FLUSH_ALL; 3885 flags |= FLUSH_ALL;
3877 flush = 1; 3886 flush = 1;
3878 } 3887 }
3879 if (journal->j_next_async_flush) { 3888 if (journal->j_next_async_flush) {
3880 flags |= COMMIT_NOW | WAIT; 3889 flags |= COMMIT_NOW | WAIT;
3881 wait_on_commit = 1; 3890 wait_on_commit = 1;
3882 } 3891 }
3883 3892
3884 /* check_journal_end locks the journal, and unlocks if it does not return 1 3893 /* check_journal_end locks the journal, and unlocks if it does not return 1
3885 ** it tells us if we should continue with the journal_end, or just return 3894 ** it tells us if we should continue with the journal_end, or just return
3886 */ 3895 */
3887 if (!check_journal_end(th, p_s_sb, nblocks, flags)) { 3896 if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
3888 p_s_sb->s_dirt = 1; 3897 p_s_sb->s_dirt = 1;
3889 wake_queued_writers(p_s_sb); 3898 wake_queued_writers(p_s_sb);
3890 reiserfs_async_progress_wait(p_s_sb); 3899 reiserfs_async_progress_wait(p_s_sb);
3891 goto out; 3900 goto out;
3892 } 3901 }
3893 3902
3894 /* check_journal_end might set these, check again */ 3903 /* check_journal_end might set these, check again */
3895 if (journal->j_next_full_flush) { 3904 if (journal->j_next_full_flush) {
3896 flush = 1; 3905 flush = 1;
3897 } 3906 }
3898 3907
3899 /* 3908 /*
3900 ** j must wait means we have to flush the log blocks, and the real blocks for 3909 ** j must wait means we have to flush the log blocks, and the real blocks for
3901 ** this transaction 3910 ** this transaction
3902 */ 3911 */
3903 if (journal->j_must_wait > 0) { 3912 if (journal->j_must_wait > 0) {
3904 flush = 1; 3913 flush = 1;
3905 } 3914 }
3906 #ifdef REISERFS_PREALLOCATE 3915 #ifdef REISERFS_PREALLOCATE
3907 /* quota ops might need to nest, setup the journal_info pointer for them 3916 /* quota ops might need to nest, setup the journal_info pointer for them
3908 * and raise the refcount so that it is > 0. */ 3917 * and raise the refcount so that it is > 0. */
3909 current->journal_info = th; 3918 current->journal_info = th;
3910 th->t_refcount++; 3919 th->t_refcount++;
3911 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 3920 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
3912 * the transaction */ 3921 * the transaction */
3913 th->t_refcount--; 3922 th->t_refcount--;
3914 current->journal_info = th->t_handle_save; 3923 current->journal_info = th->t_handle_save;
3915 #endif 3924 #endif
3916 3925
3917 /* setup description block */ 3926 /* setup description block */
3918 d_bh = 3927 d_bh =
3919 journal_getblk(p_s_sb, 3928 journal_getblk(p_s_sb,
3920 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 3929 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
3921 journal->j_start); 3930 journal->j_start);
3922 set_buffer_uptodate(d_bh); 3931 set_buffer_uptodate(d_bh);
3923 desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; 3932 desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
3924 memset(d_bh->b_data, 0, d_bh->b_size); 3933 memset(d_bh->b_data, 0, d_bh->b_size);
3925 memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); 3934 memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
3926 set_desc_trans_id(desc, journal->j_trans_id); 3935 set_desc_trans_id(desc, journal->j_trans_id);
3927 3936
3928 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 3937 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */
3929 c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 3938 c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
3930 ((journal->j_start + journal->j_len + 3939 ((journal->j_start + journal->j_len +
3931 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 3940 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
3932 commit = (struct reiserfs_journal_commit *)c_bh->b_data; 3941 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
3933 memset(c_bh->b_data, 0, c_bh->b_size); 3942 memset(c_bh->b_data, 0, c_bh->b_size);
3934 set_commit_trans_id(commit, journal->j_trans_id); 3943 set_commit_trans_id(commit, journal->j_trans_id);
3935 set_buffer_uptodate(c_bh); 3944 set_buffer_uptodate(c_bh);
3936 3945
3937 /* init this journal list */ 3946 /* init this journal list */
3938 jl = journal->j_current_jl; 3947 jl = journal->j_current_jl;
3939 3948
3940 /* we lock the commit before doing anything because 3949 /* we lock the commit before doing anything because
3941 * we want to make sure nobody tries to run flush_commit_list until 3950 * we want to make sure nobody tries to run flush_commit_list until
3942 * the new transaction is fully setup, and we've already flushed the 3951 * the new transaction is fully setup, and we've already flushed the
3943 * ordered bh list 3952 * ordered bh list
3944 */ 3953 */
3945 down(&jl->j_commit_lock); 3954 down(&jl->j_commit_lock);
3946 3955
3947 /* save the transaction id in case we need to commit it later */ 3956 /* save the transaction id in case we need to commit it later */
3948 commit_trans_id = jl->j_trans_id; 3957 commit_trans_id = jl->j_trans_id;
3949 3958
3950 atomic_set(&jl->j_older_commits_done, 0); 3959 atomic_set(&jl->j_older_commits_done, 0);
3951 jl->j_trans_id = journal->j_trans_id; 3960 jl->j_trans_id = journal->j_trans_id;
3952 jl->j_timestamp = journal->j_trans_start_time; 3961 jl->j_timestamp = journal->j_trans_start_time;
3953 jl->j_commit_bh = c_bh; 3962 jl->j_commit_bh = c_bh;
3954 jl->j_start = journal->j_start; 3963 jl->j_start = journal->j_start;
3955 jl->j_len = journal->j_len; 3964 jl->j_len = journal->j_len;
3956 atomic_set(&jl->j_nonzerolen, journal->j_len); 3965 atomic_set(&jl->j_nonzerolen, journal->j_len);
3957 atomic_set(&jl->j_commit_left, journal->j_len + 2); 3966 atomic_set(&jl->j_commit_left, journal->j_len + 2);
3958 jl->j_realblock = NULL; 3967 jl->j_realblock = NULL;
3959 3968
3960 /* The ENTIRE FOR LOOP MUST not cause schedule to occur. 3969 /* The ENTIRE FOR LOOP MUST not cause schedule to occur.
3961 ** for each real block, add it to the journal list hash, 3970 ** for each real block, add it to the journal list hash,
3962 ** copy into real block index array in the commit or desc block 3971 ** copy into real block index array in the commit or desc block
3963 */ 3972 */
3964 trans_half = journal_trans_half(p_s_sb->s_blocksize); 3973 trans_half = journal_trans_half(p_s_sb->s_blocksize);
3965 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { 3974 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
3966 if (buffer_journaled(cn->bh)) { 3975 if (buffer_journaled(cn->bh)) {
3967 jl_cn = get_cnode(p_s_sb); 3976 jl_cn = get_cnode(p_s_sb);
3968 if (!jl_cn) { 3977 if (!jl_cn) {
3969 reiserfs_panic(p_s_sb, 3978 reiserfs_panic(p_s_sb,
3970 "journal-1676, get_cnode returned NULL\n"); 3979 "journal-1676, get_cnode returned NULL\n");
3971 } 3980 }
3972 if (i == 0) { 3981 if (i == 0) {
3973 jl->j_realblock = jl_cn; 3982 jl->j_realblock = jl_cn;
3974 } 3983 }
3975 jl_cn->prev = last_cn; 3984 jl_cn->prev = last_cn;
3976 jl_cn->next = NULL; 3985 jl_cn->next = NULL;
3977 if (last_cn) { 3986 if (last_cn) {
3978 last_cn->next = jl_cn; 3987 last_cn->next = jl_cn;
3979 } 3988 }
3980 last_cn = jl_cn; 3989 last_cn = jl_cn;
3981 /* make sure the block we are trying to log is not a block 3990 /* make sure the block we are trying to log is not a block
3982 of journal or reserved area */ 3991 of journal or reserved area */
3983 3992
3984 if (is_block_in_log_or_reserved_area 3993 if (is_block_in_log_or_reserved_area
3985 (p_s_sb, cn->bh->b_blocknr)) { 3994 (p_s_sb, cn->bh->b_blocknr)) {
3986 reiserfs_panic(p_s_sb, 3995 reiserfs_panic(p_s_sb,
3987 "journal-2332: Trying to log block %lu, which is a log block\n", 3996 "journal-2332: Trying to log block %lu, which is a log block\n",
3988 cn->bh->b_blocknr); 3997 cn->bh->b_blocknr);
3989 } 3998 }
3990 jl_cn->blocknr = cn->bh->b_blocknr; 3999 jl_cn->blocknr = cn->bh->b_blocknr;
3991 jl_cn->state = 0; 4000 jl_cn->state = 0;
3992 jl_cn->sb = p_s_sb; 4001 jl_cn->sb = p_s_sb;
3993 jl_cn->bh = cn->bh; 4002 jl_cn->bh = cn->bh;
3994 jl_cn->jlist = jl; 4003 jl_cn->jlist = jl;
3995 insert_journal_hash(journal->j_list_hash_table, jl_cn); 4004 insert_journal_hash(journal->j_list_hash_table, jl_cn);
3996 if (i < trans_half) { 4005 if (i < trans_half) {
3997 desc->j_realblock[i] = 4006 desc->j_realblock[i] =
3998 cpu_to_le32(cn->bh->b_blocknr); 4007 cpu_to_le32(cn->bh->b_blocknr);
3999 } else { 4008 } else {
4000 commit->j_realblock[i - trans_half] = 4009 commit->j_realblock[i - trans_half] =
4001 cpu_to_le32(cn->bh->b_blocknr); 4010 cpu_to_le32(cn->bh->b_blocknr);
4002 } 4011 }
4003 } else { 4012 } else {
4004 i--; 4013 i--;
4005 } 4014 }
4006 } 4015 }
4007 set_desc_trans_len(desc, journal->j_len); 4016 set_desc_trans_len(desc, journal->j_len);
4008 set_desc_mount_id(desc, journal->j_mount_id); 4017 set_desc_mount_id(desc, journal->j_mount_id);
4009 set_desc_trans_id(desc, journal->j_trans_id); 4018 set_desc_trans_id(desc, journal->j_trans_id);
4010 set_commit_trans_len(commit, journal->j_len); 4019 set_commit_trans_len(commit, journal->j_len);
4011 4020
4012 /* special check in case all buffers in the journal were marked for not logging */ 4021 /* special check in case all buffers in the journal were marked for not logging */
4013 if (journal->j_len == 0) { 4022 if (journal->j_len == 0) {
4014 BUG(); 4023 BUG();
4015 } 4024 }
4016 4025
4017 /* we're about to dirty all the log blocks, mark the description block 4026 /* we're about to dirty all the log blocks, mark the description block
4018 * dirty now too. Don't mark the commit block dirty until all the 4027 * dirty now too. Don't mark the commit block dirty until all the
4019 * others are on disk 4028 * others are on disk
4020 */ 4029 */
4021 mark_buffer_dirty(d_bh); 4030 mark_buffer_dirty(d_bh);
4022 4031
4023 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ 4032 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
4024 cur_write_start = journal->j_start; 4033 cur_write_start = journal->j_start;
4025 cn = journal->j_first; 4034 cn = journal->j_first;
4026 jindex = 1; /* start at one so we don't get the desc again */ 4035 jindex = 1; /* start at one so we don't get the desc again */
4027 while (cn) { 4036 while (cn) {
4028 clear_buffer_journal_new(cn->bh); 4037 clear_buffer_journal_new(cn->bh);
4029 /* copy all the real blocks into log area. dirty log blocks */ 4038 /* copy all the real blocks into log area. dirty log blocks */
4030 if (buffer_journaled(cn->bh)) { 4039 if (buffer_journaled(cn->bh)) {
4031 struct buffer_head *tmp_bh; 4040 struct buffer_head *tmp_bh;
4032 char *addr; 4041 char *addr;
4033 struct page *page; 4042 struct page *page;
4034 tmp_bh = 4043 tmp_bh =
4035 journal_getblk(p_s_sb, 4044 journal_getblk(p_s_sb,
4036 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4045 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
4037 ((cur_write_start + 4046 ((cur_write_start +
4038 jindex) % 4047 jindex) %
4039 SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 4048 SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
4040 set_buffer_uptodate(tmp_bh); 4049 set_buffer_uptodate(tmp_bh);
4041 page = cn->bh->b_page; 4050 page = cn->bh->b_page;
4042 addr = kmap(page); 4051 addr = kmap(page);
4043 memcpy(tmp_bh->b_data, 4052 memcpy(tmp_bh->b_data,
4044 addr + offset_in_page(cn->bh->b_data), 4053 addr + offset_in_page(cn->bh->b_data),
4045 cn->bh->b_size); 4054 cn->bh->b_size);
4046 kunmap(page); 4055 kunmap(page);
4047 mark_buffer_dirty(tmp_bh); 4056 mark_buffer_dirty(tmp_bh);
4048 jindex++; 4057 jindex++;
4049 set_buffer_journal_dirty(cn->bh); 4058 set_buffer_journal_dirty(cn->bh);
4050 clear_buffer_journaled(cn->bh); 4059 clear_buffer_journaled(cn->bh);
4051 } else { 4060 } else {
4052 /* JDirty cleared sometime during transaction. don't log this one */ 4061 /* JDirty cleared sometime during transaction. don't log this one */
4053 reiserfs_warning(p_s_sb, 4062 reiserfs_warning(p_s_sb,
4054 "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!"); 4063 "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!");
4055 brelse(cn->bh); 4064 brelse(cn->bh);
4056 } 4065 }
4057 next = cn->next; 4066 next = cn->next;
4058 free_cnode(p_s_sb, cn); 4067 free_cnode(p_s_sb, cn);
4059 cn = next; 4068 cn = next;
4060 cond_resched(); 4069 cond_resched();
4061 } 4070 }
4062 4071
4063 /* we are done with both the c_bh and d_bh, but 4072 /* we are done with both the c_bh and d_bh, but
4064 ** c_bh must be written after all other commit blocks, 4073 ** c_bh must be written after all other commit blocks,
4065 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 4074 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
4066 */ 4075 */
4067 4076
4068 journal->j_current_jl = alloc_journal_list(p_s_sb); 4077 journal->j_current_jl = alloc_journal_list(p_s_sb);
4069 4078
4070 /* now it is safe to insert this transaction on the main list */ 4079 /* now it is safe to insert this transaction on the main list */
4071 list_add_tail(&jl->j_list, &journal->j_journal_list); 4080 list_add_tail(&jl->j_list, &journal->j_journal_list);
4072 list_add_tail(&jl->j_working_list, &journal->j_working_list); 4081 list_add_tail(&jl->j_working_list, &journal->j_working_list);
4073 journal->j_num_work_lists++; 4082 journal->j_num_work_lists++;
4074 4083
4075 /* reset journal values for the next transaction */ 4084 /* reset journal values for the next transaction */
4076 old_start = journal->j_start; 4085 old_start = journal->j_start;
4077 journal->j_start = 4086 journal->j_start =
4078 (journal->j_start + journal->j_len + 4087 (journal->j_start + journal->j_len +
4079 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); 4088 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb);
4080 atomic_set(&(journal->j_wcount), 0); 4089 atomic_set(&(journal->j_wcount), 0);
4081 journal->j_bcount = 0; 4090 journal->j_bcount = 0;
4082 journal->j_last = NULL; 4091 journal->j_last = NULL;
4083 journal->j_first = NULL; 4092 journal->j_first = NULL;
4084 journal->j_len = 0; 4093 journal->j_len = 0;
4085 journal->j_trans_start_time = 0; 4094 journal->j_trans_start_time = 0;
4086 journal->j_trans_id++; 4095 journal->j_trans_id++;
4087 journal->j_current_jl->j_trans_id = journal->j_trans_id; 4096 journal->j_current_jl->j_trans_id = journal->j_trans_id;
4088 journal->j_must_wait = 0; 4097 journal->j_must_wait = 0;
4089 journal->j_len_alloc = 0; 4098 journal->j_len_alloc = 0;
4090 journal->j_next_full_flush = 0; 4099 journal->j_next_full_flush = 0;
4091 journal->j_next_async_flush = 0; 4100 journal->j_next_async_flush = 0;
4092 init_journal_hash(p_s_sb); 4101 init_journal_hash(p_s_sb);
4093 4102
4094 // make sure reiserfs_add_jh sees the new current_jl before we 4103 // make sure reiserfs_add_jh sees the new current_jl before we
4095 // write out the tails 4104 // write out the tails
4096 smp_mb(); 4105 smp_mb();
4097 4106
4098 /* tail conversion targets have to hit the disk before we end the 4107 /* tail conversion targets have to hit the disk before we end the
4099 * transaction. Otherwise a later transaction might repack the tail 4108 * transaction. Otherwise a later transaction might repack the tail
4100 * before this transaction commits, leaving the data block unflushed and 4109 * before this transaction commits, leaving the data block unflushed and
4101 * clean, if we crash before the later transaction commits, the data block 4110 * clean, if we crash before the later transaction commits, the data block
4102 * is lost. 4111 * is lost.
4103 */ 4112 */
4104 if (!list_empty(&jl->j_tail_bh_list)) { 4113 if (!list_empty(&jl->j_tail_bh_list)) {
4105 unlock_kernel(); 4114 unlock_kernel();
4106 write_ordered_buffers(&journal->j_dirty_buffers_lock, 4115 write_ordered_buffers(&journal->j_dirty_buffers_lock,
4107 journal, jl, &jl->j_tail_bh_list); 4116 journal, jl, &jl->j_tail_bh_list);
4108 lock_kernel(); 4117 lock_kernel();
4109 } 4118 }
4110 if (!list_empty(&jl->j_tail_bh_list)) 4119 if (!list_empty(&jl->j_tail_bh_list))
4111 BUG(); 4120 BUG();
4112 up(&jl->j_commit_lock); 4121 up(&jl->j_commit_lock);
4113 4122
4114 /* honor the flush wishes from the caller, simple commits can 4123 /* honor the flush wishes from the caller, simple commits can
4115 ** be done outside the journal lock, they are done below 4124 ** be done outside the journal lock, they are done below
4116 ** 4125 **
4117 ** if we don't flush the commit list right now, we put it into 4126 ** if we don't flush the commit list right now, we put it into
4118 ** the work queue so the people waiting on the async progress work 4127 ** the work queue so the people waiting on the async progress work
4119 ** queue don't wait for this proc to flush journal lists and such. 4128 ** queue don't wait for this proc to flush journal lists and such.
4120 */ 4129 */
4121 if (flush) { 4130 if (flush) {
4122 flush_commit_list(p_s_sb, jl, 1); 4131 flush_commit_list(p_s_sb, jl, 1);
4123 flush_journal_list(p_s_sb, jl, 1); 4132 flush_journal_list(p_s_sb, jl, 1);
4124 } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 4133 } else if (!(jl->j_state & LIST_COMMIT_PENDING))
4125 queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); 4134 queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
4126 4135
4127 /* if the next transaction has any chance of wrapping, flush 4136 /* if the next transaction has any chance of wrapping, flush
4128 ** transactions that might get overwritten. If any journal lists are very 4137 ** transactions that might get overwritten. If any journal lists are very
4129 ** old flush them as well. 4138 ** old flush them as well.
4130 */ 4139 */
4131 first_jl: 4140 first_jl:
4132 list_for_each_safe(entry, safe, &journal->j_journal_list) { 4141 list_for_each_safe(entry, safe, &journal->j_journal_list) {
4133 temp_jl = JOURNAL_LIST_ENTRY(entry); 4142 temp_jl = JOURNAL_LIST_ENTRY(entry);
4134 if (journal->j_start <= temp_jl->j_start) { 4143 if (journal->j_start <= temp_jl->j_start) {
4135 if ((journal->j_start + journal->j_trans_max + 1) >= 4144 if ((journal->j_start + journal->j_trans_max + 1) >=
4136 temp_jl->j_start) { 4145 temp_jl->j_start) {
4137 flush_used_journal_lists(p_s_sb, temp_jl); 4146 flush_used_journal_lists(p_s_sb, temp_jl);
4138 goto first_jl; 4147 goto first_jl;
4139 } else if ((journal->j_start + 4148 } else if ((journal->j_start +
4140 journal->j_trans_max + 1) < 4149 journal->j_trans_max + 1) <
4141 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 4150 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
4142 /* if we don't cross into the next transaction and we don't 4151 /* if we don't cross into the next transaction and we don't
4143 * wrap, there is no way we can overlap any later transactions 4152 * wrap, there is no way we can overlap any later transactions
4144 * break now 4153 * break now
4145 */ 4154 */
4146 break; 4155 break;
4147 } 4156 }
4148 } else if ((journal->j_start + 4157 } else if ((journal->j_start +
4149 journal->j_trans_max + 1) > 4158 journal->j_trans_max + 1) >
4150 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 4159 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
4151 if (((journal->j_start + journal->j_trans_max + 1) % 4160 if (((journal->j_start + journal->j_trans_max + 1) %
4152 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= 4161 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >=
4153 temp_jl->j_start) { 4162 temp_jl->j_start) {
4154 flush_used_journal_lists(p_s_sb, temp_jl); 4163 flush_used_journal_lists(p_s_sb, temp_jl);
4155 goto first_jl; 4164 goto first_jl;
4156 } else { 4165 } else {
4157 /* we don't overlap anything from out start to the end of the 4166 /* we don't overlap anything from out start to the end of the
4158 * log, and our wrapped portion doesn't overlap anything at 4167 * log, and our wrapped portion doesn't overlap anything at
4159 * the start of the log. We can break 4168 * the start of the log. We can break
4160 */ 4169 */
4161 break; 4170 break;
4162 } 4171 }
4163 } 4172 }
4164 } 4173 }
4165 flush_old_journal_lists(p_s_sb); 4174 flush_old_journal_lists(p_s_sb);
4166 4175
4167 journal->j_current_jl->j_list_bitmap = 4176 journal->j_current_jl->j_list_bitmap =
4168 get_list_bitmap(p_s_sb, journal->j_current_jl); 4177 get_list_bitmap(p_s_sb, journal->j_current_jl);
4169 4178
4170 if (!(journal->j_current_jl->j_list_bitmap)) { 4179 if (!(journal->j_current_jl->j_list_bitmap)) {
4171 reiserfs_panic(p_s_sb, 4180 reiserfs_panic(p_s_sb,
4172 "journal-1996: do_journal_end, could not get a list bitmap\n"); 4181 "journal-1996: do_journal_end, could not get a list bitmap\n");
4173 } 4182 }
4174 4183
4175 atomic_set(&(journal->j_jlock), 0); 4184 atomic_set(&(journal->j_jlock), 0);
4176 unlock_journal(p_s_sb); 4185 unlock_journal(p_s_sb);
4177 /* wake up any body waiting to join. */ 4186 /* wake up any body waiting to join. */
4178 clear_bit(J_WRITERS_QUEUED, &journal->j_state); 4187 clear_bit(J_WRITERS_QUEUED, &journal->j_state);
4179 wake_up(&(journal->j_join_wait)); 4188 wake_up(&(journal->j_join_wait));
4180 4189
4181 if (!flush && wait_on_commit && 4190 if (!flush && wait_on_commit &&
4182 journal_list_still_alive(p_s_sb, commit_trans_id)) { 4191 journal_list_still_alive(p_s_sb, commit_trans_id)) {
4183 flush_commit_list(p_s_sb, jl, 1); 4192 flush_commit_list(p_s_sb, jl, 1);
4184 } 4193 }
4185 out: 4194 out:
4186 reiserfs_check_lock_depth(p_s_sb, "journal end2"); 4195 reiserfs_check_lock_depth(p_s_sb, "journal end2");
4187 4196
4188 memset(th, 0, sizeof(*th)); 4197 memset(th, 0, sizeof(*th));
4189 /* Re-set th->t_super, so we can properly keep track of how many 4198 /* Re-set th->t_super, so we can properly keep track of how many
4190 * persistent transactions there are. We need to do this so if this 4199 * persistent transactions there are. We need to do this so if this
4191 * call is part of a failed restart_transaction, we can free it later */ 4200 * call is part of a failed restart_transaction, we can free it later */
4192 th->t_super = p_s_sb; 4201 th->t_super = p_s_sb;
4193 4202
4194 return journal->j_errno; 4203 return journal->j_errno;
4195 } 4204 }
4196 4205
4197 static void __reiserfs_journal_abort_hard(struct super_block *sb) 4206 static void __reiserfs_journal_abort_hard(struct super_block *sb)
4198 { 4207 {
4199 struct reiserfs_journal *journal = SB_JOURNAL(sb); 4208 struct reiserfs_journal *journal = SB_JOURNAL(sb);
4200 if (test_bit(J_ABORTED, &journal->j_state)) 4209 if (test_bit(J_ABORTED, &journal->j_state))
4201 return; 4210 return;
4202 4211
4203 printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", 4212 printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n",
4204 reiserfs_bdevname(sb)); 4213 reiserfs_bdevname(sb));
4205 4214
4206 sb->s_flags |= MS_RDONLY; 4215 sb->s_flags |= MS_RDONLY;
4207 set_bit(J_ABORTED, &journal->j_state); 4216 set_bit(J_ABORTED, &journal->j_state);
4208 4217
4209 #ifdef CONFIG_REISERFS_CHECK 4218 #ifdef CONFIG_REISERFS_CHECK
4210 dump_stack(); 4219 dump_stack();
4211 #endif 4220 #endif
4212 } 4221 }
4213 4222
4214 static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno) 4223 static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
4215 { 4224 {
4216 struct reiserfs_journal *journal = SB_JOURNAL(sb); 4225 struct reiserfs_journal *journal = SB_JOURNAL(sb);
4217 if (test_bit(J_ABORTED, &journal->j_state)) 4226 if (test_bit(J_ABORTED, &journal->j_state))
4218 return; 4227 return;
4219 4228
4220 if (!journal->j_errno) 4229 if (!journal->j_errno)
4221 journal->j_errno = errno; 4230 journal->j_errno = errno;
4222 4231
4223 __reiserfs_journal_abort_hard(sb); 4232 __reiserfs_journal_abort_hard(sb);
4224 } 4233 }
4225 4234
4226 void reiserfs_journal_abort(struct super_block *sb, int errno) 4235 void reiserfs_journal_abort(struct super_block *sb, int errno)
4227 { 4236 {
4228 return __reiserfs_journal_abort_soft(sb, errno); 4237 return __reiserfs_journal_abort_soft(sb, errno);
4229 } 4238 }
4230 4239