Commit cbc3d65ebcb0c494183d45cf202a53352cbf3871

Authored by Dave Kleikamp
1 parent de8fd087b2

JFS: Improve sync barrier processing

Under heavy load, hot metadata pages are often locked by non-committed
transactions, making them difficult to flush to disk.  This prevents
the sync point from advancing past a transaction that had modified the
page.

There is a point during the sync barrier processing where all
outstanding transactions have been committed to disk, but no new
transaction have been allowed to proceed.  This is the best time
to write the metadata.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>

Showing 4 changed files with 26 additions and 24 deletions Inline Diff

1 /* 1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2004 2 * Copyright (C) International Business Machines Corp., 2000-2004
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20 /* 20 /*
21 * jfs_logmgr.c: log manager 21 * jfs_logmgr.c: log manager
22 * 22 *
23 * for related information, see transaction manager (jfs_txnmgr.c), and 23 * for related information, see transaction manager (jfs_txnmgr.c), and
24 * recovery manager (jfs_logredo.c). 24 * recovery manager (jfs_logredo.c).
25 * 25 *
26 * note: for detail, RTFS. 26 * note: for detail, RTFS.
27 * 27 *
28 * log buffer manager: 28 * log buffer manager:
29 * special purpose buffer manager supporting log i/o requirements. 29 * special purpose buffer manager supporting log i/o requirements.
30 * per log serial pageout of logpage 30 * per log serial pageout of logpage
31 * queuing i/o requests and redrive i/o at iodone 31 * queuing i/o requests and redrive i/o at iodone
32 * maintain current logpage buffer 32 * maintain current logpage buffer
33 * no caching since append only 33 * no caching since append only
34 * appropriate jfs buffer cache buffers as needed 34 * appropriate jfs buffer cache buffers as needed
35 * 35 *
36 * group commit: 36 * group commit:
37 * transactions which wrote COMMIT records in the same in-memory 37 * transactions which wrote COMMIT records in the same in-memory
38 * log page during the pageout of previous/current log page(s) are 38 * log page during the pageout of previous/current log page(s) are
39 * committed together by the pageout of the page. 39 * committed together by the pageout of the page.
40 * 40 *
41 * TBD lazy commit: 41 * TBD lazy commit:
42 * transactions are committed asynchronously when the log page 42 * transactions are committed asynchronously when the log page
43 * containing it COMMIT is paged out when it becomes full; 43 * containing it COMMIT is paged out when it becomes full;
44 * 44 *
45 * serialization: 45 * serialization:
46 * . a per log lock serialize log write. 46 * . a per log lock serialize log write.
47 * . a per log lock serialize group commit. 47 * . a per log lock serialize group commit.
48 * . a per log lock serialize log open/close; 48 * . a per log lock serialize log open/close;
49 * 49 *
50 * TBD log integrity: 50 * TBD log integrity:
51 * careful-write (ping-pong) of last logpage to recover from crash 51 * careful-write (ping-pong) of last logpage to recover from crash
52 * in overwrite. 52 * in overwrite.
53 * detection of split (out-of-order) write of physical sectors 53 * detection of split (out-of-order) write of physical sectors
54 * of last logpage via timestamp at end of each sector 54 * of last logpage via timestamp at end of each sector
55 * with its mirror data array at trailer). 55 * with its mirror data array at trailer).
56 * 56 *
57 * alternatives: 57 * alternatives:
58 * lsn - 64-bit monotonically increasing integer vs 58 * lsn - 64-bit monotonically increasing integer vs
59 * 32-bit lspn and page eor. 59 * 32-bit lspn and page eor.
60 */ 60 */
61 61
62 #include <linux/fs.h> 62 #include <linux/fs.h>
63 #include <linux/blkdev.h> 63 #include <linux/blkdev.h>
64 #include <linux/interrupt.h> 64 #include <linux/interrupt.h>
65 #include <linux/smp_lock.h> 65 #include <linux/smp_lock.h>
66 #include <linux/completion.h> 66 #include <linux/completion.h>
67 #include <linux/buffer_head.h> /* for sync_blockdev() */ 67 #include <linux/buffer_head.h> /* for sync_blockdev() */
68 #include <linux/bio.h> 68 #include <linux/bio.h>
69 #include <linux/suspend.h> 69 #include <linux/suspend.h>
70 #include <linux/delay.h> 70 #include <linux/delay.h>
71 #include "jfs_incore.h" 71 #include "jfs_incore.h"
72 #include "jfs_filsys.h" 72 #include "jfs_filsys.h"
73 #include "jfs_metapage.h" 73 #include "jfs_metapage.h"
74 #include "jfs_superblock.h" 74 #include "jfs_superblock.h"
75 #include "jfs_txnmgr.h" 75 #include "jfs_txnmgr.h"
76 #include "jfs_debug.h" 76 #include "jfs_debug.h"
77 77
78 78
79 /* 79 /*
80 * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread) 80 * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread)
81 */ 81 */
82 static struct lbuf *log_redrive_list; 82 static struct lbuf *log_redrive_list;
83 static DEFINE_SPINLOCK(log_redrive_lock); 83 static DEFINE_SPINLOCK(log_redrive_lock);
84 DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait); 84 DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
85 85
86 86
87 /* 87 /*
88 * log read/write serialization (per log) 88 * log read/write serialization (per log)
89 */ 89 */
90 #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) 90 #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock)
91 #define LOG_LOCK(log) down(&((log)->loglock)) 91 #define LOG_LOCK(log) down(&((log)->loglock))
92 #define LOG_UNLOCK(log) up(&((log)->loglock)) 92 #define LOG_UNLOCK(log) up(&((log)->loglock))
93 93
94 94
95 /* 95 /*
96 * log group commit serialization (per log) 96 * log group commit serialization (per log)
97 */ 97 */
98 98
99 #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock) 99 #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
100 #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock) 100 #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
101 #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock) 101 #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
102 #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait) 102 #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)
103 103
104 /* 104 /*
105 * log sync serialization (per log) 105 * log sync serialization (per log)
106 */ 106 */
107 #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE) 107 #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)
108 #define LOGSYNC_BARRIER(logsize) ((logsize)/4) 108 #define LOGSYNC_BARRIER(logsize) ((logsize)/4)
109 /* 109 /*
110 #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE) 110 #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)
111 #define LOGSYNC_BARRIER(logsize) ((logsize)/2) 111 #define LOGSYNC_BARRIER(logsize) ((logsize)/2)
112 */ 112 */
113 113
114 114
115 /* 115 /*
116 * log buffer cache synchronization 116 * log buffer cache synchronization
117 */ 117 */
118 static DEFINE_SPINLOCK(jfsLCacheLock); 118 static DEFINE_SPINLOCK(jfsLCacheLock);
119 119
120 #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags) 120 #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)
121 #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags) 121 #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)
122 122
123 /* 123 /*
124 * See __SLEEP_COND in jfs_locks.h 124 * See __SLEEP_COND in jfs_locks.h
125 */ 125 */
126 #define LCACHE_SLEEP_COND(wq, cond, flags) \ 126 #define LCACHE_SLEEP_COND(wq, cond, flags) \
127 do { \ 127 do { \
128 if (cond) \ 128 if (cond) \
129 break; \ 129 break; \
130 __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \ 130 __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
131 } while (0) 131 } while (0)
132 132
133 #define LCACHE_WAKEUP(event) wake_up(event) 133 #define LCACHE_WAKEUP(event) wake_up(event)
134 134
135 135
136 /* 136 /*
137 * lbuf buffer cache (lCache) control 137 * lbuf buffer cache (lCache) control
138 */ 138 */
139 /* log buffer manager pageout control (cumulative, inclusive) */ 139 /* log buffer manager pageout control (cumulative, inclusive) */
140 #define lbmREAD 0x0001 140 #define lbmREAD 0x0001
141 #define lbmWRITE 0x0002 /* enqueue at tail of write queue; 141 #define lbmWRITE 0x0002 /* enqueue at tail of write queue;
142 * init pageout if at head of queue; 142 * init pageout if at head of queue;
143 */ 143 */
144 #define lbmRELEASE 0x0004 /* remove from write queue 144 #define lbmRELEASE 0x0004 /* remove from write queue
145 * at completion of pageout; 145 * at completion of pageout;
146 * do not free/recycle it yet: 146 * do not free/recycle it yet:
147 * caller will free it; 147 * caller will free it;
148 */ 148 */
149 #define lbmSYNC 0x0008 /* do not return to freelist 149 #define lbmSYNC 0x0008 /* do not return to freelist
150 * when removed from write queue; 150 * when removed from write queue;
151 */ 151 */
152 #define lbmFREE 0x0010 /* return to freelist 152 #define lbmFREE 0x0010 /* return to freelist
153 * at completion of pageout; 153 * at completion of pageout;
154 * the buffer may be recycled; 154 * the buffer may be recycled;
155 */ 155 */
156 #define lbmDONE 0x0020 156 #define lbmDONE 0x0020
157 #define lbmERROR 0x0040 157 #define lbmERROR 0x0040
158 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing 158 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing
159 * of log page 159 * of log page
160 */ 160 */
161 #define lbmDIRECT 0x0100 161 #define lbmDIRECT 0x0100
162 162
163 /* 163 /*
164 * Global list of active external journals 164 * Global list of active external journals
165 */ 165 */
166 static LIST_HEAD(jfs_external_logs); 166 static LIST_HEAD(jfs_external_logs);
167 static struct jfs_log *dummy_log = NULL; 167 static struct jfs_log *dummy_log = NULL;
168 static DECLARE_MUTEX(jfs_log_sem); 168 static DECLARE_MUTEX(jfs_log_sem);
169 169
170 /* 170 /*
171 * forward references 171 * forward references
172 */ 172 */
173 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, 173 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
174 struct lrd * lrd, struct tlock * tlck); 174 struct lrd * lrd, struct tlock * tlck);
175 175
176 static int lmNextPage(struct jfs_log * log); 176 static int lmNextPage(struct jfs_log * log);
177 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, 177 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
178 int activate); 178 int activate);
179 179
180 static int open_inline_log(struct super_block *sb); 180 static int open_inline_log(struct super_block *sb);
181 static int open_dummy_log(struct super_block *sb); 181 static int open_dummy_log(struct super_block *sb);
182 static int lbmLogInit(struct jfs_log * log); 182 static int lbmLogInit(struct jfs_log * log);
183 static void lbmLogShutdown(struct jfs_log * log); 183 static void lbmLogShutdown(struct jfs_log * log);
184 static struct lbuf *lbmAllocate(struct jfs_log * log, int); 184 static struct lbuf *lbmAllocate(struct jfs_log * log, int);
185 static void lbmFree(struct lbuf * bp); 185 static void lbmFree(struct lbuf * bp);
186 static void lbmfree(struct lbuf * bp); 186 static void lbmfree(struct lbuf * bp);
187 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp); 187 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
188 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block); 188 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
189 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag); 189 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
190 static int lbmIOWait(struct lbuf * bp, int flag); 190 static int lbmIOWait(struct lbuf * bp, int flag);
191 static bio_end_io_t lbmIODone; 191 static bio_end_io_t lbmIODone;
192 static void lbmStartIO(struct lbuf * bp); 192 static void lbmStartIO(struct lbuf * bp);
193 static void lmGCwrite(struct jfs_log * log, int cant_block); 193 static void lmGCwrite(struct jfs_log * log, int cant_block);
194 static int lmLogSync(struct jfs_log * log, int nosyncwait); 194 static int lmLogSync(struct jfs_log * log, int hard_sync);
195 195
196 196
197 197
198 /* 198 /*
199 * statistics 199 * statistics
200 */ 200 */
201 #ifdef CONFIG_JFS_STATISTICS 201 #ifdef CONFIG_JFS_STATISTICS
202 static struct lmStat { 202 static struct lmStat {
203 uint commit; /* # of commit */ 203 uint commit; /* # of commit */
204 uint pagedone; /* # of page written */ 204 uint pagedone; /* # of page written */
205 uint submitted; /* # of pages submitted */ 205 uint submitted; /* # of pages submitted */
206 uint full_page; /* # of full pages submitted */ 206 uint full_page; /* # of full pages submitted */
207 uint partial_page; /* # of partial pages submitted */ 207 uint partial_page; /* # of partial pages submitted */
208 } lmStat; 208 } lmStat;
209 #endif 209 #endif
210 210
211 211
212 /* 212 /*
213 * NAME: lmLog() 213 * NAME: lmLog()
214 * 214 *
215 * FUNCTION: write a log record; 215 * FUNCTION: write a log record;
216 * 216 *
217 * PARAMETER: 217 * PARAMETER:
218 * 218 *
219 * RETURN: lsn - offset to the next log record to write (end-of-log); 219 * RETURN: lsn - offset to the next log record to write (end-of-log);
220 * -1 - error; 220 * -1 - error;
221 * 221 *
222 * note: todo: log error handler 222 * note: todo: log error handler
223 */ 223 */
224 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 224 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
225 struct tlock * tlck) 225 struct tlock * tlck)
226 { 226 {
227 int lsn; 227 int lsn;
228 int diffp, difft; 228 int diffp, difft;
229 struct metapage *mp = NULL; 229 struct metapage *mp = NULL;
230 unsigned long flags; 230 unsigned long flags;
231 231
232 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", 232 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
233 log, tblk, lrd, tlck); 233 log, tblk, lrd, tlck);
234 234
235 LOG_LOCK(log); 235 LOG_LOCK(log);
236 236
237 /* log by (out-of-transaction) JFS ? */ 237 /* log by (out-of-transaction) JFS ? */
238 if (tblk == NULL) 238 if (tblk == NULL)
239 goto writeRecord; 239 goto writeRecord;
240 240
241 /* log from page ? */ 241 /* log from page ? */
242 if (tlck == NULL || 242 if (tlck == NULL ||
243 tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) 243 tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
244 goto writeRecord; 244 goto writeRecord;
245 245
246 /* 246 /*
247 * initialize/update page/transaction recovery lsn 247 * initialize/update page/transaction recovery lsn
248 */ 248 */
249 lsn = log->lsn; 249 lsn = log->lsn;
250 250
251 LOGSYNC_LOCK(log, flags); 251 LOGSYNC_LOCK(log, flags);
252 252
253 /* 253 /*
254 * initialize page lsn if first log write of the page 254 * initialize page lsn if first log write of the page
255 */ 255 */
256 if (mp->lsn == 0) { 256 if (mp->lsn == 0) {
257 mp->log = log; 257 mp->log = log;
258 mp->lsn = lsn; 258 mp->lsn = lsn;
259 log->count++; 259 log->count++;
260 260
261 /* insert page at tail of logsynclist */ 261 /* insert page at tail of logsynclist */
262 list_add_tail(&mp->synclist, &log->synclist); 262 list_add_tail(&mp->synclist, &log->synclist);
263 } 263 }
264 264
265 /* 265 /*
266 * initialize/update lsn of tblock of the page 266 * initialize/update lsn of tblock of the page
267 * 267 *
268 * transaction inherits oldest lsn of pages associated 268 * transaction inherits oldest lsn of pages associated
269 * with allocation/deallocation of resources (their 269 * with allocation/deallocation of resources (their
270 * log records are used to reconstruct allocation map 270 * log records are used to reconstruct allocation map
271 * at recovery time: inode for inode allocation map, 271 * at recovery time: inode for inode allocation map,
272 * B+-tree index of extent descriptors for block 272 * B+-tree index of extent descriptors for block
273 * allocation map); 273 * allocation map);
274 * allocation map pages inherit transaction lsn at 274 * allocation map pages inherit transaction lsn at
275 * commit time to allow forwarding log syncpt past log 275 * commit time to allow forwarding log syncpt past log
276 * records associated with allocation/deallocation of 276 * records associated with allocation/deallocation of
277 * resources only after persistent map of these map pages 277 * resources only after persistent map of these map pages
278 * have been updated and propagated to home. 278 * have been updated and propagated to home.
279 */ 279 */
280 /* 280 /*
281 * initialize transaction lsn: 281 * initialize transaction lsn:
282 */ 282 */
283 if (tblk->lsn == 0) { 283 if (tblk->lsn == 0) {
284 /* inherit lsn of its first page logged */ 284 /* inherit lsn of its first page logged */
285 tblk->lsn = mp->lsn; 285 tblk->lsn = mp->lsn;
286 log->count++; 286 log->count++;
287 287
288 /* insert tblock after the page on logsynclist */ 288 /* insert tblock after the page on logsynclist */
289 list_add(&tblk->synclist, &mp->synclist); 289 list_add(&tblk->synclist, &mp->synclist);
290 } 290 }
291 /* 291 /*
292 * update transaction lsn: 292 * update transaction lsn:
293 */ 293 */
294 else { 294 else {
295 /* inherit oldest/smallest lsn of page */ 295 /* inherit oldest/smallest lsn of page */
296 logdiff(diffp, mp->lsn, log); 296 logdiff(diffp, mp->lsn, log);
297 logdiff(difft, tblk->lsn, log); 297 logdiff(difft, tblk->lsn, log);
298 if (diffp < difft) { 298 if (diffp < difft) {
299 /* update tblock lsn with page lsn */ 299 /* update tblock lsn with page lsn */
300 tblk->lsn = mp->lsn; 300 tblk->lsn = mp->lsn;
301 301
302 /* move tblock after page on logsynclist */ 302 /* move tblock after page on logsynclist */
303 list_move(&tblk->synclist, &mp->synclist); 303 list_move(&tblk->synclist, &mp->synclist);
304 } 304 }
305 } 305 }
306 306
307 LOGSYNC_UNLOCK(log, flags); 307 LOGSYNC_UNLOCK(log, flags);
308 308
309 /* 309 /*
310 * write the log record 310 * write the log record
311 */ 311 */
312 writeRecord: 312 writeRecord:
313 lsn = lmWriteRecord(log, tblk, lrd, tlck); 313 lsn = lmWriteRecord(log, tblk, lrd, tlck);
314 314
315 /* 315 /*
316 * forward log syncpt if log reached next syncpt trigger 316 * forward log syncpt if log reached next syncpt trigger
317 */ 317 */
318 logdiff(diffp, lsn, log); 318 logdiff(diffp, lsn, log);
319 if (diffp >= log->nextsync) 319 if (diffp >= log->nextsync)
320 lsn = lmLogSync(log, 0); 320 lsn = lmLogSync(log, 0);
321 321
322 /* update end-of-log lsn */ 322 /* update end-of-log lsn */
323 log->lsn = lsn; 323 log->lsn = lsn;
324 324
325 LOG_UNLOCK(log); 325 LOG_UNLOCK(log);
326 326
327 /* return end-of-log address */ 327 /* return end-of-log address */
328 return lsn; 328 return lsn;
329 } 329 }
330 330
331 /* 331 /*
332 * NAME: lmWriteRecord() 332 * NAME: lmWriteRecord()
333 * 333 *
334 * FUNCTION: move the log record to current log page 334 * FUNCTION: move the log record to current log page
335 * 335 *
336 * PARAMETER: cd - commit descriptor 336 * PARAMETER: cd - commit descriptor
337 * 337 *
338 * RETURN: end-of-log address 338 * RETURN: end-of-log address
339 * 339 *
340 * serialization: LOG_LOCK() held on entry/exit 340 * serialization: LOG_LOCK() held on entry/exit
341 */ 341 */
342 static int 342 static int
343 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 343 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
344 struct tlock * tlck) 344 struct tlock * tlck)
345 { 345 {
346 int lsn = 0; /* end-of-log address */ 346 int lsn = 0; /* end-of-log address */
347 struct lbuf *bp; /* dst log page buffer */ 347 struct lbuf *bp; /* dst log page buffer */
348 struct logpage *lp; /* dst log page */ 348 struct logpage *lp; /* dst log page */
349 caddr_t dst; /* destination address in log page */ 349 caddr_t dst; /* destination address in log page */
350 int dstoffset; /* end-of-log offset in log page */ 350 int dstoffset; /* end-of-log offset in log page */
351 int freespace; /* free space in log page */ 351 int freespace; /* free space in log page */
352 caddr_t p; /* src meta-data page */ 352 caddr_t p; /* src meta-data page */
353 caddr_t src; 353 caddr_t src;
354 int srclen; 354 int srclen;
355 int nbytes; /* number of bytes to move */ 355 int nbytes; /* number of bytes to move */
356 int i; 356 int i;
357 int len; 357 int len;
358 struct linelock *linelock; 358 struct linelock *linelock;
359 struct lv *lv; 359 struct lv *lv;
360 struct lvd *lvd; 360 struct lvd *lvd;
361 int l2linesize; 361 int l2linesize;
362 362
363 len = 0; 363 len = 0;
364 364
365 /* retrieve destination log page to write */ 365 /* retrieve destination log page to write */
366 bp = (struct lbuf *) log->bp; 366 bp = (struct lbuf *) log->bp;
367 lp = (struct logpage *) bp->l_ldata; 367 lp = (struct logpage *) bp->l_ldata;
368 dstoffset = log->eor; 368 dstoffset = log->eor;
369 369
370 /* any log data to write ? */ 370 /* any log data to write ? */
371 if (tlck == NULL) 371 if (tlck == NULL)
372 goto moveLrd; 372 goto moveLrd;
373 373
374 /* 374 /*
375 * move log record data 375 * move log record data
376 */ 376 */
377 /* retrieve source meta-data page to log */ 377 /* retrieve source meta-data page to log */
378 if (tlck->flag & tlckPAGELOCK) { 378 if (tlck->flag & tlckPAGELOCK) {
379 p = (caddr_t) (tlck->mp->data); 379 p = (caddr_t) (tlck->mp->data);
380 linelock = (struct linelock *) & tlck->lock; 380 linelock = (struct linelock *) & tlck->lock;
381 } 381 }
382 /* retrieve source in-memory inode to log */ 382 /* retrieve source in-memory inode to log */
383 else if (tlck->flag & tlckINODELOCK) { 383 else if (tlck->flag & tlckINODELOCK) {
384 if (tlck->type & tlckDTREE) 384 if (tlck->type & tlckDTREE)
385 p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; 385 p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
386 else 386 else
387 p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; 387 p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
388 linelock = (struct linelock *) & tlck->lock; 388 linelock = (struct linelock *) & tlck->lock;
389 } 389 }
390 #ifdef _JFS_WIP 390 #ifdef _JFS_WIP
391 else if (tlck->flag & tlckINLINELOCK) { 391 else if (tlck->flag & tlckINLINELOCK) {
392 392
393 inlinelock = (struct inlinelock *) & tlck; 393 inlinelock = (struct inlinelock *) & tlck;
394 p = (caddr_t) & inlinelock->pxd; 394 p = (caddr_t) & inlinelock->pxd;
395 linelock = (struct linelock *) & tlck; 395 linelock = (struct linelock *) & tlck;
396 } 396 }
397 #endif /* _JFS_WIP */ 397 #endif /* _JFS_WIP */
398 else { 398 else {
399 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); 399 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
400 return 0; /* Probably should trap */ 400 return 0; /* Probably should trap */
401 } 401 }
402 l2linesize = linelock->l2linesize; 402 l2linesize = linelock->l2linesize;
403 403
404 moveData: 404 moveData:
405 ASSERT(linelock->index <= linelock->maxcnt); 405 ASSERT(linelock->index <= linelock->maxcnt);
406 406
407 lv = linelock->lv; 407 lv = linelock->lv;
408 for (i = 0; i < linelock->index; i++, lv++) { 408 for (i = 0; i < linelock->index; i++, lv++) {
409 if (lv->length == 0) 409 if (lv->length == 0)
410 continue; 410 continue;
411 411
412 /* is page full ? */ 412 /* is page full ? */
413 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { 413 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
414 /* page become full: move on to next page */ 414 /* page become full: move on to next page */
415 lmNextPage(log); 415 lmNextPage(log);
416 416
417 bp = log->bp; 417 bp = log->bp;
418 lp = (struct logpage *) bp->l_ldata; 418 lp = (struct logpage *) bp->l_ldata;
419 dstoffset = LOGPHDRSIZE; 419 dstoffset = LOGPHDRSIZE;
420 } 420 }
421 421
422 /* 422 /*
423 * move log vector data 423 * move log vector data
424 */ 424 */
425 src = (u8 *) p + (lv->offset << l2linesize); 425 src = (u8 *) p + (lv->offset << l2linesize);
426 srclen = lv->length << l2linesize; 426 srclen = lv->length << l2linesize;
427 len += srclen; 427 len += srclen;
428 while (srclen > 0) { 428 while (srclen > 0) {
429 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; 429 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
430 nbytes = min(freespace, srclen); 430 nbytes = min(freespace, srclen);
431 dst = (caddr_t) lp + dstoffset; 431 dst = (caddr_t) lp + dstoffset;
432 memcpy(dst, src, nbytes); 432 memcpy(dst, src, nbytes);
433 dstoffset += nbytes; 433 dstoffset += nbytes;
434 434
435 /* is page not full ? */ 435 /* is page not full ? */
436 if (dstoffset < LOGPSIZE - LOGPTLRSIZE) 436 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
437 break; 437 break;
438 438
439 /* page become full: move on to next page */ 439 /* page become full: move on to next page */
440 lmNextPage(log); 440 lmNextPage(log);
441 441
442 bp = (struct lbuf *) log->bp; 442 bp = (struct lbuf *) log->bp;
443 lp = (struct logpage *) bp->l_ldata; 443 lp = (struct logpage *) bp->l_ldata;
444 dstoffset = LOGPHDRSIZE; 444 dstoffset = LOGPHDRSIZE;
445 445
446 srclen -= nbytes; 446 srclen -= nbytes;
447 src += nbytes; 447 src += nbytes;
448 } 448 }
449 449
450 /* 450 /*
451 * move log vector descriptor 451 * move log vector descriptor
452 */ 452 */
453 len += 4; 453 len += 4;
454 lvd = (struct lvd *) ((caddr_t) lp + dstoffset); 454 lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
455 lvd->offset = cpu_to_le16(lv->offset); 455 lvd->offset = cpu_to_le16(lv->offset);
456 lvd->length = cpu_to_le16(lv->length); 456 lvd->length = cpu_to_le16(lv->length);
457 dstoffset += 4; 457 dstoffset += 4;
458 jfs_info("lmWriteRecord: lv offset:%d length:%d", 458 jfs_info("lmWriteRecord: lv offset:%d length:%d",
459 lv->offset, lv->length); 459 lv->offset, lv->length);
460 } 460 }
461 461
462 if ((i = linelock->next)) { 462 if ((i = linelock->next)) {
463 linelock = (struct linelock *) lid_to_tlock(i); 463 linelock = (struct linelock *) lid_to_tlock(i);
464 goto moveData; 464 goto moveData;
465 } 465 }
466 466
467 /* 467 /*
468 * move log record descriptor 468 * move log record descriptor
469 */ 469 */
470 moveLrd: 470 moveLrd:
471 lrd->length = cpu_to_le16(len); 471 lrd->length = cpu_to_le16(len);
472 472
473 src = (caddr_t) lrd; 473 src = (caddr_t) lrd;
474 srclen = LOGRDSIZE; 474 srclen = LOGRDSIZE;
475 475
476 while (srclen > 0) { 476 while (srclen > 0) {
477 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; 477 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
478 nbytes = min(freespace, srclen); 478 nbytes = min(freespace, srclen);
479 dst = (caddr_t) lp + dstoffset; 479 dst = (caddr_t) lp + dstoffset;
480 memcpy(dst, src, nbytes); 480 memcpy(dst, src, nbytes);
481 481
482 dstoffset += nbytes; 482 dstoffset += nbytes;
483 srclen -= nbytes; 483 srclen -= nbytes;
484 484
485 /* are there more to move than freespace of page ? */ 485 /* are there more to move than freespace of page ? */
486 if (srclen) 486 if (srclen)
487 goto pageFull; 487 goto pageFull;
488 488
489 /* 489 /*
490 * end of log record descriptor 490 * end of log record descriptor
491 */ 491 */
492 492
493 /* update last log record eor */ 493 /* update last log record eor */
494 log->eor = dstoffset; 494 log->eor = dstoffset;
495 bp->l_eor = dstoffset; 495 bp->l_eor = dstoffset;
496 lsn = (log->page << L2LOGPSIZE) + dstoffset; 496 lsn = (log->page << L2LOGPSIZE) + dstoffset;
497 497
498 if (lrd->type & cpu_to_le16(LOG_COMMIT)) { 498 if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
499 tblk->clsn = lsn; 499 tblk->clsn = lsn;
500 jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, 500 jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
501 bp->l_eor); 501 bp->l_eor);
502 502
503 INCREMENT(lmStat.commit); /* # of commit */ 503 INCREMENT(lmStat.commit); /* # of commit */
504 504
505 /* 505 /*
506 * enqueue tblock for group commit: 506 * enqueue tblock for group commit:
507 * 507 *
508 * enqueue tblock of non-trivial/synchronous COMMIT 508 * enqueue tblock of non-trivial/synchronous COMMIT
509 * at tail of group commit queue 509 * at tail of group commit queue
510 * (trivial/asynchronous COMMITs are ignored by 510 * (trivial/asynchronous COMMITs are ignored by
511 * group commit.) 511 * group commit.)
512 */ 512 */
513 LOGGC_LOCK(log); 513 LOGGC_LOCK(log);
514 514
515 /* init tblock gc state */ 515 /* init tblock gc state */
516 tblk->flag = tblkGC_QUEUE; 516 tblk->flag = tblkGC_QUEUE;
517 tblk->bp = log->bp; 517 tblk->bp = log->bp;
518 tblk->pn = log->page; 518 tblk->pn = log->page;
519 tblk->eor = log->eor; 519 tblk->eor = log->eor;
520 520
521 /* enqueue transaction to commit queue */ 521 /* enqueue transaction to commit queue */
522 list_add_tail(&tblk->cqueue, &log->cqueue); 522 list_add_tail(&tblk->cqueue, &log->cqueue);
523 523
524 LOGGC_UNLOCK(log); 524 LOGGC_UNLOCK(log);
525 } 525 }
526 526
527 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", 527 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
528 le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); 528 le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
529 529
530 /* page not full ? */ 530 /* page not full ? */
531 if (dstoffset < LOGPSIZE - LOGPTLRSIZE) 531 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
532 return lsn; 532 return lsn;
533 533
534 pageFull: 534 pageFull:
535 /* page become full: move on to next page */ 535 /* page become full: move on to next page */
536 lmNextPage(log); 536 lmNextPage(log);
537 537
538 bp = (struct lbuf *) log->bp; 538 bp = (struct lbuf *) log->bp;
539 lp = (struct logpage *) bp->l_ldata; 539 lp = (struct logpage *) bp->l_ldata;
540 dstoffset = LOGPHDRSIZE; 540 dstoffset = LOGPHDRSIZE;
541 src += nbytes; 541 src += nbytes;
542 } 542 }
543 543
544 return lsn; 544 return lsn;
545 } 545 }
546 546
547 547
548 /* 548 /*
549 * NAME: lmNextPage() 549 * NAME: lmNextPage()
550 * 550 *
551 * FUNCTION: write current page and allocate next page. 551 * FUNCTION: write current page and allocate next page.
552 * 552 *
553 * PARAMETER: log 553 * PARAMETER: log
554 * 554 *
555 * RETURN: 0 555 * RETURN: 0
556 * 556 *
557 * serialization: LOG_LOCK() held on entry/exit 557 * serialization: LOG_LOCK() held on entry/exit
558 */ 558 */
559 static int lmNextPage(struct jfs_log * log) 559 static int lmNextPage(struct jfs_log * log)
560 { 560 {
561 struct logpage *lp; 561 struct logpage *lp;
562 int lspn; /* log sequence page number */ 562 int lspn; /* log sequence page number */
563 int pn; /* current page number */ 563 int pn; /* current page number */
564 struct lbuf *bp; 564 struct lbuf *bp;
565 struct lbuf *nextbp; 565 struct lbuf *nextbp;
566 struct tblock *tblk; 566 struct tblock *tblk;
567 567
568 /* get current log page number and log sequence page number */ 568 /* get current log page number and log sequence page number */
569 pn = log->page; 569 pn = log->page;
570 bp = log->bp; 570 bp = log->bp;
571 lp = (struct logpage *) bp->l_ldata; 571 lp = (struct logpage *) bp->l_ldata;
572 lspn = le32_to_cpu(lp->h.page); 572 lspn = le32_to_cpu(lp->h.page);
573 573
574 LOGGC_LOCK(log); 574 LOGGC_LOCK(log);
575 575
576 /* 576 /*
577 * write or queue the full page at the tail of write queue 577 * write or queue the full page at the tail of write queue
578 */ 578 */
579 /* get the tail tblk on commit queue */ 579 /* get the tail tblk on commit queue */
580 if (list_empty(&log->cqueue)) 580 if (list_empty(&log->cqueue))
581 tblk = NULL; 581 tblk = NULL;
582 else 582 else
583 tblk = list_entry(log->cqueue.prev, struct tblock, cqueue); 583 tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
584 584
585 /* every tblk who has COMMIT record on the current page, 585 /* every tblk who has COMMIT record on the current page,
586 * and has not been committed, must be on commit queue 586 * and has not been committed, must be on commit queue
587 * since tblk is queued at commit queueu at the time 587 * since tblk is queued at commit queueu at the time
588 * of writing its COMMIT record on the page before 588 * of writing its COMMIT record on the page before
589 * page becomes full (even though the tblk thread 589 * page becomes full (even though the tblk thread
590 * who wrote COMMIT record may have been suspended 590 * who wrote COMMIT record may have been suspended
591 * currently); 591 * currently);
592 */ 592 */
593 593
594 /* is page bound with outstanding tail tblk ? */ 594 /* is page bound with outstanding tail tblk ? */
595 if (tblk && tblk->pn == pn) { 595 if (tblk && tblk->pn == pn) {
596 /* mark tblk for end-of-page */ 596 /* mark tblk for end-of-page */
597 tblk->flag |= tblkGC_EOP; 597 tblk->flag |= tblkGC_EOP;
598 598
599 if (log->cflag & logGC_PAGEOUT) { 599 if (log->cflag & logGC_PAGEOUT) {
600 /* if page is not already on write queue, 600 /* if page is not already on write queue,
601 * just enqueue (no lbmWRITE to prevent redrive) 601 * just enqueue (no lbmWRITE to prevent redrive)
602 * buffer to wqueue to ensure correct serial order 602 * buffer to wqueue to ensure correct serial order
603 * of the pages since log pages will be added 603 * of the pages since log pages will be added
604 * continuously 604 * continuously
605 */ 605 */
606 if (bp->l_wqnext == NULL) 606 if (bp->l_wqnext == NULL)
607 lbmWrite(log, bp, 0, 0); 607 lbmWrite(log, bp, 0, 0);
608 } else { 608 } else {
609 /* 609 /*
610 * No current GC leader, initiate group commit 610 * No current GC leader, initiate group commit
611 */ 611 */
612 log->cflag |= logGC_PAGEOUT; 612 log->cflag |= logGC_PAGEOUT;
613 lmGCwrite(log, 0); 613 lmGCwrite(log, 0);
614 } 614 }
615 } 615 }
616 /* page is not bound with outstanding tblk: 616 /* page is not bound with outstanding tblk:
617 * init write or mark it to be redriven (lbmWRITE) 617 * init write or mark it to be redriven (lbmWRITE)
618 */ 618 */
619 else { 619 else {
620 /* finalize the page */ 620 /* finalize the page */
621 bp->l_ceor = bp->l_eor; 621 bp->l_ceor = bp->l_eor;
622 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 622 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
623 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); 623 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
624 } 624 }
625 LOGGC_UNLOCK(log); 625 LOGGC_UNLOCK(log);
626 626
627 /* 627 /*
628 * allocate/initialize next page 628 * allocate/initialize next page
629 */ 629 */
630 /* if log wraps, the first data page of log is 2 630 /* if log wraps, the first data page of log is 2
631 * (0 never used, 1 is superblock). 631 * (0 never used, 1 is superblock).
632 */ 632 */
633 log->page = (pn == log->size - 1) ? 2 : pn + 1; 633 log->page = (pn == log->size - 1) ? 2 : pn + 1;
634 log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ 634 log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
635 635
636 /* allocate/initialize next log page buffer */ 636 /* allocate/initialize next log page buffer */
637 nextbp = lbmAllocate(log, log->page); 637 nextbp = lbmAllocate(log, log->page);
638 nextbp->l_eor = log->eor; 638 nextbp->l_eor = log->eor;
639 log->bp = nextbp; 639 log->bp = nextbp;
640 640
641 /* initialize next log page */ 641 /* initialize next log page */
642 lp = (struct logpage *) nextbp->l_ldata; 642 lp = (struct logpage *) nextbp->l_ldata;
643 lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); 643 lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
644 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); 644 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
645 645
646 return 0; 646 return 0;
647 } 647 }
648 648
649 649
650 /* 650 /*
651 * NAME: lmGroupCommit() 651 * NAME: lmGroupCommit()
652 * 652 *
653 * FUNCTION: group commit 653 * FUNCTION: group commit
654 * initiate pageout of the pages with COMMIT in the order of 654 * initiate pageout of the pages with COMMIT in the order of
655 * page number - redrive pageout of the page at the head of 655 * page number - redrive pageout of the page at the head of
656 * pageout queue until full page has been written. 656 * pageout queue until full page has been written.
657 * 657 *
658 * RETURN: 658 * RETURN:
659 * 659 *
660 * NOTE: 660 * NOTE:
661 * LOGGC_LOCK serializes log group commit queue, and 661 * LOGGC_LOCK serializes log group commit queue, and
662 * transaction blocks on the commit queue. 662 * transaction blocks on the commit queue.
663 * N.B. LOG_LOCK is NOT held during lmGroupCommit(). 663 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
664 */ 664 */
665 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk) 665 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
666 { 666 {
667 int rc = 0; 667 int rc = 0;
668 668
669 LOGGC_LOCK(log); 669 LOGGC_LOCK(log);
670 670
671 /* group committed already ? */ 671 /* group committed already ? */
672 if (tblk->flag & tblkGC_COMMITTED) { 672 if (tblk->flag & tblkGC_COMMITTED) {
673 if (tblk->flag & tblkGC_ERROR) 673 if (tblk->flag & tblkGC_ERROR)
674 rc = -EIO; 674 rc = -EIO;
675 675
676 LOGGC_UNLOCK(log); 676 LOGGC_UNLOCK(log);
677 return rc; 677 return rc;
678 } 678 }
679 jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc); 679 jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
680 680
681 if (tblk->xflag & COMMIT_LAZY) 681 if (tblk->xflag & COMMIT_LAZY)
682 tblk->flag |= tblkGC_LAZY; 682 tblk->flag |= tblkGC_LAZY;
683 683
684 if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) && 684 if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
685 (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag) 685 (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
686 || jfs_tlocks_low)) { 686 || jfs_tlocks_low)) {
687 /* 687 /*
688 * No pageout in progress 688 * No pageout in progress
689 * 689 *
690 * start group commit as its group leader. 690 * start group commit as its group leader.
691 */ 691 */
692 log->cflag |= logGC_PAGEOUT; 692 log->cflag |= logGC_PAGEOUT;
693 693
694 lmGCwrite(log, 0); 694 lmGCwrite(log, 0);
695 } 695 }
696 696
697 if (tblk->xflag & COMMIT_LAZY) { 697 if (tblk->xflag & COMMIT_LAZY) {
698 /* 698 /*
699 * Lazy transactions can leave now 699 * Lazy transactions can leave now
700 */ 700 */
701 LOGGC_UNLOCK(log); 701 LOGGC_UNLOCK(log);
702 return 0; 702 return 0;
703 } 703 }
704 704
705 /* lmGCwrite gives up LOGGC_LOCK, check again */ 705 /* lmGCwrite gives up LOGGC_LOCK, check again */
706 706
707 if (tblk->flag & tblkGC_COMMITTED) { 707 if (tblk->flag & tblkGC_COMMITTED) {
708 if (tblk->flag & tblkGC_ERROR) 708 if (tblk->flag & tblkGC_ERROR)
709 rc = -EIO; 709 rc = -EIO;
710 710
711 LOGGC_UNLOCK(log); 711 LOGGC_UNLOCK(log);
712 return rc; 712 return rc;
713 } 713 }
714 714
715 /* upcount transaction waiting for completion 715 /* upcount transaction waiting for completion
716 */ 716 */
717 log->gcrtc++; 717 log->gcrtc++;
718 tblk->flag |= tblkGC_READY; 718 tblk->flag |= tblkGC_READY;
719 719
720 __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), 720 __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
721 LOGGC_LOCK(log), LOGGC_UNLOCK(log)); 721 LOGGC_LOCK(log), LOGGC_UNLOCK(log));
722 722
723 /* removed from commit queue */ 723 /* removed from commit queue */
724 if (tblk->flag & tblkGC_ERROR) 724 if (tblk->flag & tblkGC_ERROR)
725 rc = -EIO; 725 rc = -EIO;
726 726
727 LOGGC_UNLOCK(log); 727 LOGGC_UNLOCK(log);
728 return rc; 728 return rc;
729 } 729 }
730 730
731 /* 731 /*
732 * NAME: lmGCwrite() 732 * NAME: lmGCwrite()
733 * 733 *
734 * FUNCTION: group commit write 734 * FUNCTION: group commit write
735 * initiate write of log page, building a group of all transactions 735 * initiate write of log page, building a group of all transactions
736 * with commit records on that page. 736 * with commit records on that page.
737 * 737 *
738 * RETURN: None 738 * RETURN: None
739 * 739 *
740 * NOTE: 740 * NOTE:
741 * LOGGC_LOCK must be held by caller. 741 * LOGGC_LOCK must be held by caller.
742 * N.B. LOG_LOCK is NOT held during lmGroupCommit(). 742 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
743 */ 743 */
744 static void lmGCwrite(struct jfs_log * log, int cant_write) 744 static void lmGCwrite(struct jfs_log * log, int cant_write)
745 { 745 {
746 struct lbuf *bp; 746 struct lbuf *bp;
747 struct logpage *lp; 747 struct logpage *lp;
748 int gcpn; /* group commit page number */ 748 int gcpn; /* group commit page number */
749 struct tblock *tblk; 749 struct tblock *tblk;
750 struct tblock *xtblk = NULL; 750 struct tblock *xtblk = NULL;
751 751
752 /* 752 /*
753 * build the commit group of a log page 753 * build the commit group of a log page
754 * 754 *
755 * scan commit queue and make a commit group of all 755 * scan commit queue and make a commit group of all
756 * transactions with COMMIT records on the same log page. 756 * transactions with COMMIT records on the same log page.
757 */ 757 */
758 /* get the head tblk on the commit queue */ 758 /* get the head tblk on the commit queue */
759 gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn; 759 gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
760 760
761 list_for_each_entry(tblk, &log->cqueue, cqueue) { 761 list_for_each_entry(tblk, &log->cqueue, cqueue) {
762 if (tblk->pn != gcpn) 762 if (tblk->pn != gcpn)
763 break; 763 break;
764 764
765 xtblk = tblk; 765 xtblk = tblk;
766 766
767 /* state transition: (QUEUE, READY) -> COMMIT */ 767 /* state transition: (QUEUE, READY) -> COMMIT */
768 tblk->flag |= tblkGC_COMMIT; 768 tblk->flag |= tblkGC_COMMIT;
769 } 769 }
770 tblk = xtblk; /* last tblk of the page */ 770 tblk = xtblk; /* last tblk of the page */
771 771
772 /* 772 /*
773 * pageout to commit transactions on the log page. 773 * pageout to commit transactions on the log page.
774 */ 774 */
775 bp = (struct lbuf *) tblk->bp; 775 bp = (struct lbuf *) tblk->bp;
776 lp = (struct logpage *) bp->l_ldata; 776 lp = (struct logpage *) bp->l_ldata;
777 /* is page already full ? */ 777 /* is page already full ? */
778 if (tblk->flag & tblkGC_EOP) { 778 if (tblk->flag & tblkGC_EOP) {
779 /* mark page to free at end of group commit of the page */ 779 /* mark page to free at end of group commit of the page */
780 tblk->flag &= ~tblkGC_EOP; 780 tblk->flag &= ~tblkGC_EOP;
781 tblk->flag |= tblkGC_FREE; 781 tblk->flag |= tblkGC_FREE;
782 bp->l_ceor = bp->l_eor; 782 bp->l_ceor = bp->l_eor;
783 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 783 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
784 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, 784 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
785 cant_write); 785 cant_write);
786 INCREMENT(lmStat.full_page); 786 INCREMENT(lmStat.full_page);
787 } 787 }
788 /* page is not yet full */ 788 /* page is not yet full */
789 else { 789 else {
790 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ 790 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
791 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 791 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
792 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); 792 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
793 INCREMENT(lmStat.partial_page); 793 INCREMENT(lmStat.partial_page);
794 } 794 }
795 } 795 }
796 796
797 /* 797 /*
798 * NAME: lmPostGC() 798 * NAME: lmPostGC()
799 * 799 *
800 * FUNCTION: group commit post-processing 800 * FUNCTION: group commit post-processing
801 * Processes transactions after their commit records have been written 801 * Processes transactions after their commit records have been written
802 * to disk, redriving log I/O if necessary. 802 * to disk, redriving log I/O if necessary.
803 * 803 *
804 * RETURN: None 804 * RETURN: None
805 * 805 *
806 * NOTE: 806 * NOTE:
807 * This routine is called a interrupt time by lbmIODone 807 * This routine is called a interrupt time by lbmIODone
808 */ 808 */
809 static void lmPostGC(struct lbuf * bp) 809 static void lmPostGC(struct lbuf * bp)
810 { 810 {
811 unsigned long flags; 811 unsigned long flags;
812 struct jfs_log *log = bp->l_log; 812 struct jfs_log *log = bp->l_log;
813 struct logpage *lp; 813 struct logpage *lp;
814 struct tblock *tblk, *temp; 814 struct tblock *tblk, *temp;
815 815
816 //LOGGC_LOCK(log); 816 //LOGGC_LOCK(log);
817 spin_lock_irqsave(&log->gclock, flags); 817 spin_lock_irqsave(&log->gclock, flags);
818 /* 818 /*
819 * current pageout of group commit completed. 819 * current pageout of group commit completed.
820 * 820 *
821 * remove/wakeup transactions from commit queue who were 821 * remove/wakeup transactions from commit queue who were
822 * group committed with the current log page 822 * group committed with the current log page
823 */ 823 */
824 list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { 824 list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
825 if (!(tblk->flag & tblkGC_COMMIT)) 825 if (!(tblk->flag & tblkGC_COMMIT))
826 break; 826 break;
827 /* if transaction was marked GC_COMMIT then 827 /* if transaction was marked GC_COMMIT then
828 * it has been shipped in the current pageout 828 * it has been shipped in the current pageout
829 * and made it to disk - it is committed. 829 * and made it to disk - it is committed.
830 */ 830 */
831 831
832 if (bp->l_flag & lbmERROR) 832 if (bp->l_flag & lbmERROR)
833 tblk->flag |= tblkGC_ERROR; 833 tblk->flag |= tblkGC_ERROR;
834 834
835 /* remove it from the commit queue */ 835 /* remove it from the commit queue */
836 list_del(&tblk->cqueue); 836 list_del(&tblk->cqueue);
837 tblk->flag &= ~tblkGC_QUEUE; 837 tblk->flag &= ~tblkGC_QUEUE;
838 838
839 if (tblk == log->flush_tblk) { 839 if (tblk == log->flush_tblk) {
840 /* we can stop flushing the log now */ 840 /* we can stop flushing the log now */
841 clear_bit(log_FLUSH, &log->flag); 841 clear_bit(log_FLUSH, &log->flag);
842 log->flush_tblk = NULL; 842 log->flush_tblk = NULL;
843 } 843 }
844 844
845 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk, 845 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
846 tblk->flag); 846 tblk->flag);
847 847
848 if (!(tblk->xflag & COMMIT_FORCE)) 848 if (!(tblk->xflag & COMMIT_FORCE))
849 /* 849 /*
850 * Hand tblk over to lazy commit thread 850 * Hand tblk over to lazy commit thread
851 */ 851 */
852 txLazyUnlock(tblk); 852 txLazyUnlock(tblk);
853 else { 853 else {
854 /* state transition: COMMIT -> COMMITTED */ 854 /* state transition: COMMIT -> COMMITTED */
855 tblk->flag |= tblkGC_COMMITTED; 855 tblk->flag |= tblkGC_COMMITTED;
856 856
857 if (tblk->flag & tblkGC_READY) 857 if (tblk->flag & tblkGC_READY)
858 log->gcrtc--; 858 log->gcrtc--;
859 859
860 LOGGC_WAKEUP(tblk); 860 LOGGC_WAKEUP(tblk);
861 } 861 }
862 862
863 /* was page full before pageout ? 863 /* was page full before pageout ?
864 * (and this is the last tblk bound with the page) 864 * (and this is the last tblk bound with the page)
865 */ 865 */
866 if (tblk->flag & tblkGC_FREE) 866 if (tblk->flag & tblkGC_FREE)
867 lbmFree(bp); 867 lbmFree(bp);
868 /* did page become full after pageout ? 868 /* did page become full after pageout ?
869 * (and this is the last tblk bound with the page) 869 * (and this is the last tblk bound with the page)
870 */ 870 */
871 else if (tblk->flag & tblkGC_EOP) { 871 else if (tblk->flag & tblkGC_EOP) {
872 /* finalize the page */ 872 /* finalize the page */
873 lp = (struct logpage *) bp->l_ldata; 873 lp = (struct logpage *) bp->l_ldata;
874 bp->l_ceor = bp->l_eor; 874 bp->l_ceor = bp->l_eor;
875 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 875 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
876 jfs_info("lmPostGC: calling lbmWrite"); 876 jfs_info("lmPostGC: calling lbmWrite");
877 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 877 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
878 1); 878 1);
879 } 879 }
880 880
881 } 881 }
882 882
883 /* are there any transactions who have entered lnGroupCommit() 883 /* are there any transactions who have entered lnGroupCommit()
884 * (whose COMMITs are after that of the last log page written. 884 * (whose COMMITs are after that of the last log page written.
885 * They are waiting for new group commit (above at (SLEEP 1)) 885 * They are waiting for new group commit (above at (SLEEP 1))
886 * or lazy transactions are on a full (queued) log page, 886 * or lazy transactions are on a full (queued) log page,
887 * select the latest ready transaction as new group leader and 887 * select the latest ready transaction as new group leader and
888 * wake her up to lead her group. 888 * wake her up to lead her group.
889 */ 889 */
890 if ((!list_empty(&log->cqueue)) && 890 if ((!list_empty(&log->cqueue)) &&
891 ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) || 891 ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
892 test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) 892 test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
893 /* 893 /*
894 * Call lmGCwrite with new group leader 894 * Call lmGCwrite with new group leader
895 */ 895 */
896 lmGCwrite(log, 1); 896 lmGCwrite(log, 1);
897 897
898 /* no transaction are ready yet (transactions are only just 898 /* no transaction are ready yet (transactions are only just
899 * queued (GC_QUEUE) and not entered for group commit yet). 899 * queued (GC_QUEUE) and not entered for group commit yet).
900 * the first transaction entering group commit 900 * the first transaction entering group commit
901 * will elect herself as new group leader. 901 * will elect herself as new group leader.
902 */ 902 */
903 else 903 else
904 log->cflag &= ~logGC_PAGEOUT; 904 log->cflag &= ~logGC_PAGEOUT;
905 905
906 //LOGGC_UNLOCK(log); 906 //LOGGC_UNLOCK(log);
907 spin_unlock_irqrestore(&log->gclock, flags); 907 spin_unlock_irqrestore(&log->gclock, flags);
908 return; 908 return;
909 } 909 }
910 910
911 /* 911 /*
912 * NAME: lmLogSync() 912 * NAME: lmLogSync()
913 * 913 *
914 * FUNCTION: write log SYNCPT record for specified log 914 * FUNCTION: write log SYNCPT record for specified log
915 * if new sync address is available 915 * if new sync address is available
916 * (normally the case if sync() is executed by back-ground 916 * (normally the case if sync() is executed by back-ground
917 * process). 917 * process).
918 * if not, explicitly run jfs_blogsync() to initiate
919 * getting of new sync address.
920 * calculate new value of i_nextsync which determines when 918 * calculate new value of i_nextsync which determines when
921 * this code is called again. 919 * this code is called again.
922 * 920 *
923 * PARAMETERS: log - log structure 921 * PARAMETERS: log - log structure
924 * nosyncwait - 1 if called asynchronously 922 * hard_sync - 1 to force all metadata to be written
925 * 923 *
926 * RETURN: 0 924 * RETURN: 0
927 * 925 *
928 * serialization: LOG_LOCK() held on entry/exit 926 * serialization: LOG_LOCK() held on entry/exit
929 */ 927 */
930 static int lmLogSync(struct jfs_log * log, int nosyncwait) 928 static int lmLogSync(struct jfs_log * log, int hard_sync)
931 { 929 {
932 int logsize; 930 int logsize;
933 int written; /* written since last syncpt */ 931 int written; /* written since last syncpt */
934 int free; /* free space left available */ 932 int free; /* free space left available */
935 int delta; /* additional delta to write normally */ 933 int delta; /* additional delta to write normally */
936 int more; /* additional write granted */ 934 int more; /* additional write granted */
937 struct lrd lrd; 935 struct lrd lrd;
938 int lsn; 936 int lsn;
939 struct logsyncblk *lp; 937 struct logsyncblk *lp;
940 struct jfs_sb_info *sbi; 938 struct jfs_sb_info *sbi;
941 unsigned long flags; 939 unsigned long flags;
942 940
943 /* push dirty metapages out to disk */ 941 /* push dirty metapages out to disk */
944 list_for_each_entry(sbi, &log->sb_list, log_list) { 942 if (hard_sync)
945 filemap_flush(sbi->ipbmap->i_mapping); 943 list_for_each_entry(sbi, &log->sb_list, log_list) {
946 filemap_flush(sbi->ipimap->i_mapping); 944 filemap_fdatawrite(sbi->ipbmap->i_mapping);
947 filemap_flush(sbi->direct_inode->i_mapping); 945 filemap_fdatawrite(sbi->ipimap->i_mapping);
948 } 946 filemap_fdatawrite(sbi->direct_inode->i_mapping);
947 }
948 else
949 list_for_each_entry(sbi, &log->sb_list, log_list) {
950 filemap_flush(sbi->ipbmap->i_mapping);
951 filemap_flush(sbi->ipimap->i_mapping);
952 filemap_flush(sbi->direct_inode->i_mapping);
953 }
949 954
950 /* 955 /*
951 * forward syncpt 956 * forward syncpt
952 */ 957 */
953 /* if last sync is same as last syncpt, 958 /* if last sync is same as last syncpt,
954 * invoke sync point forward processing to update sync. 959 * invoke sync point forward processing to update sync.
955 */ 960 */
956 961
957 if (log->sync == log->syncpt) { 962 if (log->sync == log->syncpt) {
958 LOGSYNC_LOCK(log, flags); 963 LOGSYNC_LOCK(log, flags);
959 if (list_empty(&log->synclist)) 964 if (list_empty(&log->synclist))
960 log->sync = log->lsn; 965 log->sync = log->lsn;
961 else { 966 else {
962 lp = list_entry(log->synclist.next, 967 lp = list_entry(log->synclist.next,
963 struct logsyncblk, synclist); 968 struct logsyncblk, synclist);
964 log->sync = lp->lsn; 969 log->sync = lp->lsn;
965 } 970 }
966 LOGSYNC_UNLOCK(log, flags); 971 LOGSYNC_UNLOCK(log, flags);
967 972
968 } 973 }
969 974
970 /* if sync is different from last syncpt, 975 /* if sync is different from last syncpt,
971 * write a SYNCPT record with syncpt = sync. 976 * write a SYNCPT record with syncpt = sync.
972 * reset syncpt = sync 977 * reset syncpt = sync
973 */ 978 */
974 if (log->sync != log->syncpt) { 979 if (log->sync != log->syncpt) {
975 lrd.logtid = 0; 980 lrd.logtid = 0;
976 lrd.backchain = 0; 981 lrd.backchain = 0;
977 lrd.type = cpu_to_le16(LOG_SYNCPT); 982 lrd.type = cpu_to_le16(LOG_SYNCPT);
978 lrd.length = 0; 983 lrd.length = 0;
979 lrd.log.syncpt.sync = cpu_to_le32(log->sync); 984 lrd.log.syncpt.sync = cpu_to_le32(log->sync);
980 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 985 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
981 986
982 log->syncpt = log->sync; 987 log->syncpt = log->sync;
983 } else 988 } else
984 lsn = log->lsn; 989 lsn = log->lsn;
985 990
986 /* 991 /*
987 * setup next syncpt trigger (SWAG) 992 * setup next syncpt trigger (SWAG)
988 */ 993 */
989 logsize = log->logsize; 994 logsize = log->logsize;
990 995
991 logdiff(written, lsn, log); 996 logdiff(written, lsn, log);
992 free = logsize - written; 997 free = logsize - written;
993 delta = LOGSYNC_DELTA(logsize); 998 delta = LOGSYNC_DELTA(logsize);
994 more = min(free / 2, delta); 999 more = min(free / 2, delta);
995 if (more < 2 * LOGPSIZE) { 1000 if (more < 2 * LOGPSIZE) {
996 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); 1001 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
997 /* 1002 /*
998 * log wrapping 1003 * log wrapping
999 * 1004 *
1000 * option 1 - panic ? No.! 1005 * option 1 - panic ? No.!
1001 * option 2 - shutdown file systems 1006 * option 2 - shutdown file systems
1002 * associated with log ? 1007 * associated with log ?
1003 * option 3 - extend log ? 1008 * option 3 - extend log ?
1004 */ 1009 */
1005 /* 1010 /*
1006 * option 4 - second chance 1011 * option 4 - second chance
1007 * 1012 *
1008 * mark log wrapped, and continue. 1013 * mark log wrapped, and continue.
1009 * when all active transactions are completed, 1014 * when all active transactions are completed,
1010 * mark log vaild for recovery. 1015 * mark log vaild for recovery.
1011 * if crashed during invalid state, log state 1016 * if crashed during invalid state, log state
1012 * implies invald log, forcing fsck(). 1017 * implies invald log, forcing fsck().
1013 */ 1018 */
1014 /* mark log state log wrap in log superblock */ 1019 /* mark log state log wrap in log superblock */
1015 /* log->state = LOGWRAP; */ 1020 /* log->state = LOGWRAP; */
1016 1021
1017 /* reset sync point computation */ 1022 /* reset sync point computation */
1018 log->syncpt = log->sync = lsn; 1023 log->syncpt = log->sync = lsn;
1019 log->nextsync = delta; 1024 log->nextsync = delta;
1020 } else 1025 } else
1021 /* next syncpt trigger = written + more */ 1026 /* next syncpt trigger = written + more */
1022 log->nextsync = written + more; 1027 log->nextsync = written + more;
1023 1028
1024 /* return if lmLogSync() from outside of transaction, e.g., sync() */
1025 if (nosyncwait)
1026 return lsn;
1027
1028 /* if number of bytes written from last sync point is more 1029 /* if number of bytes written from last sync point is more
1029 * than 1/4 of the log size, stop new transactions from 1030 * than 1/4 of the log size, stop new transactions from
1030 * starting until all current transactions are completed 1031 * starting until all current transactions are completed
1031 * by setting syncbarrier flag. 1032 * by setting syncbarrier flag.
1032 */ 1033 */
1033 if (!test_bit(log_SYNCBARRIER, &log->flag) && 1034 if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1034 (written > LOGSYNC_BARRIER(logsize)) && log->active) { 1035 (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1035 set_bit(log_SYNCBARRIER, &log->flag); 1036 set_bit(log_SYNCBARRIER, &log->flag);
1036 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, 1037 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1037 log->syncpt); 1038 log->syncpt);
1038 /* 1039 /*
1039 * We may have to initiate group commit 1040 * We may have to initiate group commit
1040 */ 1041 */
1041 jfs_flush_journal(log, 0); 1042 jfs_flush_journal(log, 0);
1042 } 1043 }
1043 1044
1044 return lsn; 1045 return lsn;
1045 } 1046 }
1046 1047
1047 /* 1048 /*
1048 * NAME: jfs_syncpt 1049 * NAME: jfs_syncpt
1049 * 1050 *
1050 * FUNCTION: write log SYNCPT record for specified log 1051 * FUNCTION: write log SYNCPT record for specified log
1051 * 1052 *
1052 * PARAMETERS: log - log structure 1053 * PARAMETERS: log - log structure
1054 * hard_sync - set to 1 to force metadata to be written
1053 */ 1055 */
1054 void jfs_syncpt(struct jfs_log *log) 1056 void jfs_syncpt(struct jfs_log *log, int hard_sync)
1055 { LOG_LOCK(log); 1057 { LOG_LOCK(log);
1056 lmLogSync(log, 1); 1058 lmLogSync(log, hard_sync);
1057 LOG_UNLOCK(log); 1059 LOG_UNLOCK(log);
1058 } 1060 }
1059 1061
1060 /* 1062 /*
1061 * NAME: lmLogOpen() 1063 * NAME: lmLogOpen()
1062 * 1064 *
1063 * FUNCTION: open the log on first open; 1065 * FUNCTION: open the log on first open;
1064 * insert filesystem in the active list of the log. 1066 * insert filesystem in the active list of the log.
1065 * 1067 *
1066 * PARAMETER: ipmnt - file system mount inode 1068 * PARAMETER: ipmnt - file system mount inode
1067 * iplog - log inode (out) 1069 * iplog - log inode (out)
1068 * 1070 *
1069 * RETURN: 1071 * RETURN:
1070 * 1072 *
1071 * serialization: 1073 * serialization:
1072 */ 1074 */
1073 int lmLogOpen(struct super_block *sb) 1075 int lmLogOpen(struct super_block *sb)
1074 { 1076 {
1075 int rc; 1077 int rc;
1076 struct block_device *bdev; 1078 struct block_device *bdev;
1077 struct jfs_log *log; 1079 struct jfs_log *log;
1078 struct jfs_sb_info *sbi = JFS_SBI(sb); 1080 struct jfs_sb_info *sbi = JFS_SBI(sb);
1079 1081
1080 if (sbi->flag & JFS_NOINTEGRITY) 1082 if (sbi->flag & JFS_NOINTEGRITY)
1081 return open_dummy_log(sb); 1083 return open_dummy_log(sb);
1082 1084
1083 if (sbi->mntflag & JFS_INLINELOG) 1085 if (sbi->mntflag & JFS_INLINELOG)
1084 return open_inline_log(sb); 1086 return open_inline_log(sb);
1085 1087
1086 down(&jfs_log_sem); 1088 down(&jfs_log_sem);
1087 list_for_each_entry(log, &jfs_external_logs, journal_list) { 1089 list_for_each_entry(log, &jfs_external_logs, journal_list) {
1088 if (log->bdev->bd_dev == sbi->logdev) { 1090 if (log->bdev->bd_dev == sbi->logdev) {
1089 if (memcmp(log->uuid, sbi->loguuid, 1091 if (memcmp(log->uuid, sbi->loguuid,
1090 sizeof(log->uuid))) { 1092 sizeof(log->uuid))) {
1091 jfs_warn("wrong uuid on JFS journal\n"); 1093 jfs_warn("wrong uuid on JFS journal\n");
1092 up(&jfs_log_sem); 1094 up(&jfs_log_sem);
1093 return -EINVAL; 1095 return -EINVAL;
1094 } 1096 }
1095 /* 1097 /*
1096 * add file system to log active file system list 1098 * add file system to log active file system list
1097 */ 1099 */
1098 if ((rc = lmLogFileSystem(log, sbi, 1))) { 1100 if ((rc = lmLogFileSystem(log, sbi, 1))) {
1099 up(&jfs_log_sem); 1101 up(&jfs_log_sem);
1100 return rc; 1102 return rc;
1101 } 1103 }
1102 goto journal_found; 1104 goto journal_found;
1103 } 1105 }
1104 } 1106 }
1105 1107
1106 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { 1108 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1107 up(&jfs_log_sem); 1109 up(&jfs_log_sem);
1108 return -ENOMEM; 1110 return -ENOMEM;
1109 } 1111 }
1110 memset(log, 0, sizeof(struct jfs_log)); 1112 memset(log, 0, sizeof(struct jfs_log));
1111 INIT_LIST_HEAD(&log->sb_list); 1113 INIT_LIST_HEAD(&log->sb_list);
1112 init_waitqueue_head(&log->syncwait); 1114 init_waitqueue_head(&log->syncwait);
1113 1115
1114 /* 1116 /*
1115 * external log as separate logical volume 1117 * external log as separate logical volume
1116 * 1118 *
1117 * file systems to log may have n-to-1 relationship; 1119 * file systems to log may have n-to-1 relationship;
1118 */ 1120 */
1119 1121
1120 bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); 1122 bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE);
1121 if (IS_ERR(bdev)) { 1123 if (IS_ERR(bdev)) {
1122 rc = -PTR_ERR(bdev); 1124 rc = -PTR_ERR(bdev);
1123 goto free; 1125 goto free;
1124 } 1126 }
1125 1127
1126 if ((rc = bd_claim(bdev, log))) { 1128 if ((rc = bd_claim(bdev, log))) {
1127 goto close; 1129 goto close;
1128 } 1130 }
1129 1131
1130 log->bdev = bdev; 1132 log->bdev = bdev;
1131 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); 1133 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1132 1134
1133 /* 1135 /*
1134 * initialize log: 1136 * initialize log:
1135 */ 1137 */
1136 if ((rc = lmLogInit(log))) 1138 if ((rc = lmLogInit(log)))
1137 goto unclaim; 1139 goto unclaim;
1138 1140
1139 list_add(&log->journal_list, &jfs_external_logs); 1141 list_add(&log->journal_list, &jfs_external_logs);
1140 1142
1141 /* 1143 /*
1142 * add file system to log active file system list 1144 * add file system to log active file system list
1143 */ 1145 */
1144 if ((rc = lmLogFileSystem(log, sbi, 1))) 1146 if ((rc = lmLogFileSystem(log, sbi, 1)))
1145 goto shutdown; 1147 goto shutdown;
1146 1148
1147 journal_found: 1149 journal_found:
1148 LOG_LOCK(log); 1150 LOG_LOCK(log);
1149 list_add(&sbi->log_list, &log->sb_list); 1151 list_add(&sbi->log_list, &log->sb_list);
1150 sbi->log = log; 1152 sbi->log = log;
1151 LOG_UNLOCK(log); 1153 LOG_UNLOCK(log);
1152 1154
1153 up(&jfs_log_sem); 1155 up(&jfs_log_sem);
1154 return 0; 1156 return 0;
1155 1157
1156 /* 1158 /*
1157 * unwind on error 1159 * unwind on error
1158 */ 1160 */
1159 shutdown: /* unwind lbmLogInit() */ 1161 shutdown: /* unwind lbmLogInit() */
1160 list_del(&log->journal_list); 1162 list_del(&log->journal_list);
1161 lbmLogShutdown(log); 1163 lbmLogShutdown(log);
1162 1164
1163 unclaim: 1165 unclaim:
1164 bd_release(bdev); 1166 bd_release(bdev);
1165 1167
1166 close: /* close external log device */ 1168 close: /* close external log device */
1167 blkdev_put(bdev); 1169 blkdev_put(bdev);
1168 1170
1169 free: /* free log descriptor */ 1171 free: /* free log descriptor */
1170 up(&jfs_log_sem); 1172 up(&jfs_log_sem);
1171 kfree(log); 1173 kfree(log);
1172 1174
1173 jfs_warn("lmLogOpen: exit(%d)", rc); 1175 jfs_warn("lmLogOpen: exit(%d)", rc);
1174 return rc; 1176 return rc;
1175 } 1177 }
1176 1178
1177 static int open_inline_log(struct super_block *sb) 1179 static int open_inline_log(struct super_block *sb)
1178 { 1180 {
1179 struct jfs_log *log; 1181 struct jfs_log *log;
1180 int rc; 1182 int rc;
1181 1183
1182 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) 1184 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1183 return -ENOMEM; 1185 return -ENOMEM;
1184 memset(log, 0, sizeof(struct jfs_log)); 1186 memset(log, 0, sizeof(struct jfs_log));
1185 INIT_LIST_HEAD(&log->sb_list); 1187 INIT_LIST_HEAD(&log->sb_list);
1186 init_waitqueue_head(&log->syncwait); 1188 init_waitqueue_head(&log->syncwait);
1187 1189
1188 set_bit(log_INLINELOG, &log->flag); 1190 set_bit(log_INLINELOG, &log->flag);
1189 log->bdev = sb->s_bdev; 1191 log->bdev = sb->s_bdev;
1190 log->base = addressPXD(&JFS_SBI(sb)->logpxd); 1192 log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1191 log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> 1193 log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1192 (L2LOGPSIZE - sb->s_blocksize_bits); 1194 (L2LOGPSIZE - sb->s_blocksize_bits);
1193 log->l2bsize = sb->s_blocksize_bits; 1195 log->l2bsize = sb->s_blocksize_bits;
1194 ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits); 1196 ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1195 1197
1196 /* 1198 /*
1197 * initialize log. 1199 * initialize log.
1198 */ 1200 */
1199 if ((rc = lmLogInit(log))) { 1201 if ((rc = lmLogInit(log))) {
1200 kfree(log); 1202 kfree(log);
1201 jfs_warn("lmLogOpen: exit(%d)", rc); 1203 jfs_warn("lmLogOpen: exit(%d)", rc);
1202 return rc; 1204 return rc;
1203 } 1205 }
1204 1206
1205 list_add(&JFS_SBI(sb)->log_list, &log->sb_list); 1207 list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1206 JFS_SBI(sb)->log = log; 1208 JFS_SBI(sb)->log = log;
1207 1209
1208 return rc; 1210 return rc;
1209 } 1211 }
1210 1212
1211 static int open_dummy_log(struct super_block *sb) 1213 static int open_dummy_log(struct super_block *sb)
1212 { 1214 {
1213 int rc; 1215 int rc;
1214 1216
1215 down(&jfs_log_sem); 1217 down(&jfs_log_sem);
1216 if (!dummy_log) { 1218 if (!dummy_log) {
1217 dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL); 1219 dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL);
1218 if (!dummy_log) { 1220 if (!dummy_log) {
1219 up(&jfs_log_sem); 1221 up(&jfs_log_sem);
1220 return -ENOMEM; 1222 return -ENOMEM;
1221 } 1223 }
1222 memset(dummy_log, 0, sizeof(struct jfs_log)); 1224 memset(dummy_log, 0, sizeof(struct jfs_log));
1223 INIT_LIST_HEAD(&dummy_log->sb_list); 1225 INIT_LIST_HEAD(&dummy_log->sb_list);
1224 init_waitqueue_head(&dummy_log->syncwait); 1226 init_waitqueue_head(&dummy_log->syncwait);
1225 dummy_log->no_integrity = 1; 1227 dummy_log->no_integrity = 1;
1226 /* Make up some stuff */ 1228 /* Make up some stuff */
1227 dummy_log->base = 0; 1229 dummy_log->base = 0;
1228 dummy_log->size = 1024; 1230 dummy_log->size = 1024;
1229 rc = lmLogInit(dummy_log); 1231 rc = lmLogInit(dummy_log);
1230 if (rc) { 1232 if (rc) {
1231 kfree(dummy_log); 1233 kfree(dummy_log);
1232 dummy_log = NULL; 1234 dummy_log = NULL;
1233 up(&jfs_log_sem); 1235 up(&jfs_log_sem);
1234 return rc; 1236 return rc;
1235 } 1237 }
1236 } 1238 }
1237 1239
1238 LOG_LOCK(dummy_log); 1240 LOG_LOCK(dummy_log);
1239 list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list); 1241 list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1240 JFS_SBI(sb)->log = dummy_log; 1242 JFS_SBI(sb)->log = dummy_log;
1241 LOG_UNLOCK(dummy_log); 1243 LOG_UNLOCK(dummy_log);
1242 up(&jfs_log_sem); 1244 up(&jfs_log_sem);
1243 1245
1244 return 0; 1246 return 0;
1245 } 1247 }
1246 1248
1247 /* 1249 /*
1248 * NAME: lmLogInit() 1250 * NAME: lmLogInit()
1249 * 1251 *
1250 * FUNCTION: log initialization at first log open. 1252 * FUNCTION: log initialization at first log open.
1251 * 1253 *
1252 * logredo() (or logformat()) should have been run previously. 1254 * logredo() (or logformat()) should have been run previously.
1253 * initialize the log from log superblock. 1255 * initialize the log from log superblock.
1254 * set the log state in the superblock to LOGMOUNT and 1256 * set the log state in the superblock to LOGMOUNT and
1255 * write SYNCPT log record. 1257 * write SYNCPT log record.
1256 * 1258 *
1257 * PARAMETER: log - log structure 1259 * PARAMETER: log - log structure
1258 * 1260 *
1259 * RETURN: 0 - if ok 1261 * RETURN: 0 - if ok
1260 * -EINVAL - bad log magic number or superblock dirty 1262 * -EINVAL - bad log magic number or superblock dirty
1261 * error returned from logwait() 1263 * error returned from logwait()
1262 * 1264 *
1263 * serialization: single first open thread 1265 * serialization: single first open thread
1264 */ 1266 */
1265 int lmLogInit(struct jfs_log * log) 1267 int lmLogInit(struct jfs_log * log)
1266 { 1268 {
1267 int rc = 0; 1269 int rc = 0;
1268 struct lrd lrd; 1270 struct lrd lrd;
1269 struct logsuper *logsuper; 1271 struct logsuper *logsuper;
1270 struct lbuf *bpsuper; 1272 struct lbuf *bpsuper;
1271 struct lbuf *bp; 1273 struct lbuf *bp;
1272 struct logpage *lp; 1274 struct logpage *lp;
1273 int lsn = 0; 1275 int lsn = 0;
1274 1276
1275 jfs_info("lmLogInit: log:0x%p", log); 1277 jfs_info("lmLogInit: log:0x%p", log);
1276 1278
1277 /* initialize the group commit serialization lock */ 1279 /* initialize the group commit serialization lock */
1278 LOGGC_LOCK_INIT(log); 1280 LOGGC_LOCK_INIT(log);
1279 1281
1280 /* allocate/initialize the log write serialization lock */ 1282 /* allocate/initialize the log write serialization lock */
1281 LOG_LOCK_INIT(log); 1283 LOG_LOCK_INIT(log);
1282 1284
1283 LOGSYNC_LOCK_INIT(log); 1285 LOGSYNC_LOCK_INIT(log);
1284 1286
1285 INIT_LIST_HEAD(&log->synclist); 1287 INIT_LIST_HEAD(&log->synclist);
1286 1288
1287 INIT_LIST_HEAD(&log->cqueue); 1289 INIT_LIST_HEAD(&log->cqueue);
1288 log->flush_tblk = NULL; 1290 log->flush_tblk = NULL;
1289 1291
1290 log->count = 0; 1292 log->count = 0;
1291 1293
1292 /* 1294 /*
1293 * initialize log i/o 1295 * initialize log i/o
1294 */ 1296 */
1295 if ((rc = lbmLogInit(log))) 1297 if ((rc = lbmLogInit(log)))
1296 return rc; 1298 return rc;
1297 1299
1298 if (!test_bit(log_INLINELOG, &log->flag)) 1300 if (!test_bit(log_INLINELOG, &log->flag))
1299 log->l2bsize = L2LOGPSIZE; 1301 log->l2bsize = L2LOGPSIZE;
1300 1302
1301 /* check for disabled journaling to disk */ 1303 /* check for disabled journaling to disk */
1302 if (log->no_integrity) { 1304 if (log->no_integrity) {
1303 /* 1305 /*
1304 * Journal pages will still be filled. When the time comes 1306 * Journal pages will still be filled. When the time comes
1305 * to actually do the I/O, the write is not done, and the 1307 * to actually do the I/O, the write is not done, and the
1306 * endio routine is called directly. 1308 * endio routine is called directly.
1307 */ 1309 */
1308 bp = lbmAllocate(log , 0); 1310 bp = lbmAllocate(log , 0);
1309 log->bp = bp; 1311 log->bp = bp;
1310 bp->l_pn = bp->l_eor = 0; 1312 bp->l_pn = bp->l_eor = 0;
1311 } else { 1313 } else {
1312 /* 1314 /*
1313 * validate log superblock 1315 * validate log superblock
1314 */ 1316 */
1315 if ((rc = lbmRead(log, 1, &bpsuper))) 1317 if ((rc = lbmRead(log, 1, &bpsuper)))
1316 goto errout10; 1318 goto errout10;
1317 1319
1318 logsuper = (struct logsuper *) bpsuper->l_ldata; 1320 logsuper = (struct logsuper *) bpsuper->l_ldata;
1319 1321
1320 if (logsuper->magic != cpu_to_le32(LOGMAGIC)) { 1322 if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1321 jfs_warn("*** Log Format Error ! ***"); 1323 jfs_warn("*** Log Format Error ! ***");
1322 rc = -EINVAL; 1324 rc = -EINVAL;
1323 goto errout20; 1325 goto errout20;
1324 } 1326 }
1325 1327
1326 /* logredo() should have been run successfully. */ 1328 /* logredo() should have been run successfully. */
1327 if (logsuper->state != cpu_to_le32(LOGREDONE)) { 1329 if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1328 jfs_warn("*** Log Is Dirty ! ***"); 1330 jfs_warn("*** Log Is Dirty ! ***");
1329 rc = -EINVAL; 1331 rc = -EINVAL;
1330 goto errout20; 1332 goto errout20;
1331 } 1333 }
1332 1334
1333 /* initialize log from log superblock */ 1335 /* initialize log from log superblock */
1334 if (test_bit(log_INLINELOG,&log->flag)) { 1336 if (test_bit(log_INLINELOG,&log->flag)) {
1335 if (log->size != le32_to_cpu(logsuper->size)) { 1337 if (log->size != le32_to_cpu(logsuper->size)) {
1336 rc = -EINVAL; 1338 rc = -EINVAL;
1337 goto errout20; 1339 goto errout20;
1338 } 1340 }
1339 jfs_info("lmLogInit: inline log:0x%p base:0x%Lx " 1341 jfs_info("lmLogInit: inline log:0x%p base:0x%Lx "
1340 "size:0x%x", log, 1342 "size:0x%x", log,
1341 (unsigned long long) log->base, log->size); 1343 (unsigned long long) log->base, log->size);
1342 } else { 1344 } else {
1343 if (memcmp(logsuper->uuid, log->uuid, 16)) { 1345 if (memcmp(logsuper->uuid, log->uuid, 16)) {
1344 jfs_warn("wrong uuid on JFS log device"); 1346 jfs_warn("wrong uuid on JFS log device");
1345 goto errout20; 1347 goto errout20;
1346 } 1348 }
1347 log->size = le32_to_cpu(logsuper->size); 1349 log->size = le32_to_cpu(logsuper->size);
1348 log->l2bsize = le32_to_cpu(logsuper->l2bsize); 1350 log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1349 jfs_info("lmLogInit: external log:0x%p base:0x%Lx " 1351 jfs_info("lmLogInit: external log:0x%p base:0x%Lx "
1350 "size:0x%x", log, 1352 "size:0x%x", log,
1351 (unsigned long long) log->base, log->size); 1353 (unsigned long long) log->base, log->size);
1352 } 1354 }
1353 1355
1354 log->page = le32_to_cpu(logsuper->end) / LOGPSIZE; 1356 log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1355 log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page); 1357 log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1356 1358
1357 /* 1359 /*
1358 * initialize for log append write mode 1360 * initialize for log append write mode
1359 */ 1361 */
1360 /* establish current/end-of-log page/buffer */ 1362 /* establish current/end-of-log page/buffer */
1361 if ((rc = lbmRead(log, log->page, &bp))) 1363 if ((rc = lbmRead(log, log->page, &bp)))
1362 goto errout20; 1364 goto errout20;
1363 1365
1364 lp = (struct logpage *) bp->l_ldata; 1366 lp = (struct logpage *) bp->l_ldata;
1365 1367
1366 jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d", 1368 jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1367 le32_to_cpu(logsuper->end), log->page, log->eor, 1369 le32_to_cpu(logsuper->end), log->page, log->eor,
1368 le16_to_cpu(lp->h.eor)); 1370 le16_to_cpu(lp->h.eor));
1369 1371
1370 log->bp = bp; 1372 log->bp = bp;
1371 bp->l_pn = log->page; 1373 bp->l_pn = log->page;
1372 bp->l_eor = log->eor; 1374 bp->l_eor = log->eor;
1373 1375
1374 /* if current page is full, move on to next page */ 1376 /* if current page is full, move on to next page */
1375 if (log->eor >= LOGPSIZE - LOGPTLRSIZE) 1377 if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1376 lmNextPage(log); 1378 lmNextPage(log);
1377 1379
1378 /* 1380 /*
1379 * initialize log syncpoint 1381 * initialize log syncpoint
1380 */ 1382 */
1381 /* 1383 /*
1382 * write the first SYNCPT record with syncpoint = 0 1384 * write the first SYNCPT record with syncpoint = 0
1383 * (i.e., log redo up to HERE !); 1385 * (i.e., log redo up to HERE !);
1384 * remove current page from lbm write queue at end of pageout 1386 * remove current page from lbm write queue at end of pageout
1385 * (to write log superblock update), but do not release to 1387 * (to write log superblock update), but do not release to
1386 * freelist; 1388 * freelist;
1387 */ 1389 */
1388 lrd.logtid = 0; 1390 lrd.logtid = 0;
1389 lrd.backchain = 0; 1391 lrd.backchain = 0;
1390 lrd.type = cpu_to_le16(LOG_SYNCPT); 1392 lrd.type = cpu_to_le16(LOG_SYNCPT);
1391 lrd.length = 0; 1393 lrd.length = 0;
1392 lrd.log.syncpt.sync = 0; 1394 lrd.log.syncpt.sync = 0;
1393 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 1395 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1394 bp = log->bp; 1396 bp = log->bp;
1395 bp->l_ceor = bp->l_eor; 1397 bp->l_ceor = bp->l_eor;
1396 lp = (struct logpage *) bp->l_ldata; 1398 lp = (struct logpage *) bp->l_ldata;
1397 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 1399 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1398 lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0); 1400 lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1399 if ((rc = lbmIOWait(bp, 0))) 1401 if ((rc = lbmIOWait(bp, 0)))
1400 goto errout30; 1402 goto errout30;
1401 1403
1402 /* 1404 /*
1403 * update/write superblock 1405 * update/write superblock
1404 */ 1406 */
1405 logsuper->state = cpu_to_le32(LOGMOUNT); 1407 logsuper->state = cpu_to_le32(LOGMOUNT);
1406 log->serial = le32_to_cpu(logsuper->serial) + 1; 1408 log->serial = le32_to_cpu(logsuper->serial) + 1;
1407 logsuper->serial = cpu_to_le32(log->serial); 1409 logsuper->serial = cpu_to_le32(log->serial);
1408 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1410 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1409 if ((rc = lbmIOWait(bpsuper, lbmFREE))) 1411 if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1410 goto errout30; 1412 goto errout30;
1411 } 1413 }
1412 1414
1413 /* initialize logsync parameters */ 1415 /* initialize logsync parameters */
1414 log->logsize = (log->size - 2) << L2LOGPSIZE; 1416 log->logsize = (log->size - 2) << L2LOGPSIZE;
1415 log->lsn = lsn; 1417 log->lsn = lsn;
1416 log->syncpt = lsn; 1418 log->syncpt = lsn;
1417 log->sync = log->syncpt; 1419 log->sync = log->syncpt;
1418 log->nextsync = LOGSYNC_DELTA(log->logsize); 1420 log->nextsync = LOGSYNC_DELTA(log->logsize);
1419 1421
1420 jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x", 1422 jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1421 log->lsn, log->syncpt, log->sync); 1423 log->lsn, log->syncpt, log->sync);
1422 1424
1423 /* 1425 /*
1424 * initialize for lazy/group commit 1426 * initialize for lazy/group commit
1425 */ 1427 */
1426 log->clsn = lsn; 1428 log->clsn = lsn;
1427 1429
1428 return 0; 1430 return 0;
1429 1431
1430 /* 1432 /*
1431 * unwind on error 1433 * unwind on error
1432 */ 1434 */
1433 errout30: /* release log page */ 1435 errout30: /* release log page */
1434 log->wqueue = NULL; 1436 log->wqueue = NULL;
1435 bp->l_wqnext = NULL; 1437 bp->l_wqnext = NULL;
1436 lbmFree(bp); 1438 lbmFree(bp);
1437 1439
1438 errout20: /* release log superblock */ 1440 errout20: /* release log superblock */
1439 lbmFree(bpsuper); 1441 lbmFree(bpsuper);
1440 1442
1441 errout10: /* unwind lbmLogInit() */ 1443 errout10: /* unwind lbmLogInit() */
1442 lbmLogShutdown(log); 1444 lbmLogShutdown(log);
1443 1445
1444 jfs_warn("lmLogInit: exit(%d)", rc); 1446 jfs_warn("lmLogInit: exit(%d)", rc);
1445 return rc; 1447 return rc;
1446 } 1448 }
1447 1449
1448 1450
1449 /* 1451 /*
1450 * NAME: lmLogClose() 1452 * NAME: lmLogClose()
1451 * 1453 *
1452 * FUNCTION: remove file system <ipmnt> from active list of log <iplog> 1454 * FUNCTION: remove file system <ipmnt> from active list of log <iplog>
1453 * and close it on last close. 1455 * and close it on last close.
1454 * 1456 *
1455 * PARAMETER: sb - superblock 1457 * PARAMETER: sb - superblock
1456 * 1458 *
1457 * RETURN: errors from subroutines 1459 * RETURN: errors from subroutines
1458 * 1460 *
1459 * serialization: 1461 * serialization:
1460 */ 1462 */
1461 int lmLogClose(struct super_block *sb) 1463 int lmLogClose(struct super_block *sb)
1462 { 1464 {
1463 struct jfs_sb_info *sbi = JFS_SBI(sb); 1465 struct jfs_sb_info *sbi = JFS_SBI(sb);
1464 struct jfs_log *log = sbi->log; 1466 struct jfs_log *log = sbi->log;
1465 struct block_device *bdev; 1467 struct block_device *bdev;
1466 int rc = 0; 1468 int rc = 0;
1467 1469
1468 jfs_info("lmLogClose: log:0x%p", log); 1470 jfs_info("lmLogClose: log:0x%p", log);
1469 1471
1470 down(&jfs_log_sem); 1472 down(&jfs_log_sem);
1471 LOG_LOCK(log); 1473 LOG_LOCK(log);
1472 list_del(&sbi->log_list); 1474 list_del(&sbi->log_list);
1473 LOG_UNLOCK(log); 1475 LOG_UNLOCK(log);
1474 sbi->log = NULL; 1476 sbi->log = NULL;
1475 1477
1476 /* 1478 /*
1477 * We need to make sure all of the "written" metapages 1479 * We need to make sure all of the "written" metapages
1478 * actually make it to disk 1480 * actually make it to disk
1479 */ 1481 */
1480 sync_blockdev(sb->s_bdev); 1482 sync_blockdev(sb->s_bdev);
1481 1483
1482 if (test_bit(log_INLINELOG, &log->flag)) { 1484 if (test_bit(log_INLINELOG, &log->flag)) {
1483 /* 1485 /*
1484 * in-line log in host file system 1486 * in-line log in host file system
1485 */ 1487 */
1486 rc = lmLogShutdown(log); 1488 rc = lmLogShutdown(log);
1487 kfree(log); 1489 kfree(log);
1488 goto out; 1490 goto out;
1489 } 1491 }
1490 1492
1491 if (!log->no_integrity) 1493 if (!log->no_integrity)
1492 lmLogFileSystem(log, sbi, 0); 1494 lmLogFileSystem(log, sbi, 0);
1493 1495
1494 if (!list_empty(&log->sb_list)) 1496 if (!list_empty(&log->sb_list))
1495 goto out; 1497 goto out;
1496 1498
1497 /* 1499 /*
1498 * TODO: ensure that the dummy_log is in a state to allow 1500 * TODO: ensure that the dummy_log is in a state to allow
1499 * lbmLogShutdown to deallocate all the buffers and call 1501 * lbmLogShutdown to deallocate all the buffers and call
1500 * kfree against dummy_log. For now, leave dummy_log & its 1502 * kfree against dummy_log. For now, leave dummy_log & its
1501 * buffers in memory, and resuse if another no-integrity mount 1503 * buffers in memory, and resuse if another no-integrity mount
1502 * is requested. 1504 * is requested.
1503 */ 1505 */
1504 if (log->no_integrity) 1506 if (log->no_integrity)
1505 goto out; 1507 goto out;
1506 1508
1507 /* 1509 /*
1508 * external log as separate logical volume 1510 * external log as separate logical volume
1509 */ 1511 */
1510 list_del(&log->journal_list); 1512 list_del(&log->journal_list);
1511 bdev = log->bdev; 1513 bdev = log->bdev;
1512 rc = lmLogShutdown(log); 1514 rc = lmLogShutdown(log);
1513 1515
1514 bd_release(bdev); 1516 bd_release(bdev);
1515 blkdev_put(bdev); 1517 blkdev_put(bdev);
1516 1518
1517 kfree(log); 1519 kfree(log);
1518 1520
1519 out: 1521 out:
1520 up(&jfs_log_sem); 1522 up(&jfs_log_sem);
1521 jfs_info("lmLogClose: exit(%d)", rc); 1523 jfs_info("lmLogClose: exit(%d)", rc);
1522 return rc; 1524 return rc;
1523 } 1525 }
1524 1526
1525 1527
1526 /* 1528 /*
1527 * NAME: jfs_flush_journal() 1529 * NAME: jfs_flush_journal()
1528 * 1530 *
1529 * FUNCTION: initiate write of any outstanding transactions to the journal 1531 * FUNCTION: initiate write of any outstanding transactions to the journal
1530 * and optionally wait until they are all written to disk 1532 * and optionally wait until they are all written to disk
1531 * 1533 *
1532 * wait == 0 flush until latest txn is committed, don't wait 1534 * wait == 0 flush until latest txn is committed, don't wait
1533 * wait == 1 flush until latest txn is committed, wait 1535 * wait == 1 flush until latest txn is committed, wait
1534 * wait > 1 flush until all txn's are complete, wait 1536 * wait > 1 flush until all txn's are complete, wait
1535 */ 1537 */
1536 void jfs_flush_journal(struct jfs_log *log, int wait) 1538 void jfs_flush_journal(struct jfs_log *log, int wait)
1537 { 1539 {
1538 int i; 1540 int i;
1539 struct tblock *target = NULL; 1541 struct tblock *target = NULL;
1540 struct jfs_sb_info *sbi; 1542 struct jfs_sb_info *sbi;
1541 1543
1542 /* jfs_write_inode may call us during read-only mount */ 1544 /* jfs_write_inode may call us during read-only mount */
1543 if (!log) 1545 if (!log)
1544 return; 1546 return;
1545 1547
1546 jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait); 1548 jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1547 1549
1548 LOGGC_LOCK(log); 1550 LOGGC_LOCK(log);
1549 1551
1550 if (!list_empty(&log->cqueue)) { 1552 if (!list_empty(&log->cqueue)) {
1551 /* 1553 /*
1552 * This ensures that we will keep writing to the journal as long 1554 * This ensures that we will keep writing to the journal as long
1553 * as there are unwritten commit records 1555 * as there are unwritten commit records
1554 */ 1556 */
1555 target = list_entry(log->cqueue.prev, struct tblock, cqueue); 1557 target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1556 1558
1557 if (test_bit(log_FLUSH, &log->flag)) { 1559 if (test_bit(log_FLUSH, &log->flag)) {
1558 /* 1560 /*
1559 * We're already flushing. 1561 * We're already flushing.
1560 * if flush_tblk is NULL, we are flushing everything, 1562 * if flush_tblk is NULL, we are flushing everything,
1561 * so leave it that way. Otherwise, update it to the 1563 * so leave it that way. Otherwise, update it to the
1562 * latest transaction 1564 * latest transaction
1563 */ 1565 */
1564 if (log->flush_tblk) 1566 if (log->flush_tblk)
1565 log->flush_tblk = target; 1567 log->flush_tblk = target;
1566 } else { 1568 } else {
1567 /* Only flush until latest transaction is committed */ 1569 /* Only flush until latest transaction is committed */
1568 log->flush_tblk = target; 1570 log->flush_tblk = target;
1569 set_bit(log_FLUSH, &log->flag); 1571 set_bit(log_FLUSH, &log->flag);
1570 1572
1571 /* 1573 /*
1572 * Initiate I/O on outstanding transactions 1574 * Initiate I/O on outstanding transactions
1573 */ 1575 */
1574 if (!(log->cflag & logGC_PAGEOUT)) { 1576 if (!(log->cflag & logGC_PAGEOUT)) {
1575 log->cflag |= logGC_PAGEOUT; 1577 log->cflag |= logGC_PAGEOUT;
1576 lmGCwrite(log, 0); 1578 lmGCwrite(log, 0);
1577 } 1579 }
1578 } 1580 }
1579 } 1581 }
1580 if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) { 1582 if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1581 /* Flush until all activity complete */ 1583 /* Flush until all activity complete */
1582 set_bit(log_FLUSH, &log->flag); 1584 set_bit(log_FLUSH, &log->flag);
1583 log->flush_tblk = NULL; 1585 log->flush_tblk = NULL;
1584 } 1586 }
1585 1587
1586 if (wait && target && !(target->flag & tblkGC_COMMITTED)) { 1588 if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1587 DECLARE_WAITQUEUE(__wait, current); 1589 DECLARE_WAITQUEUE(__wait, current);
1588 1590
1589 add_wait_queue(&target->gcwait, &__wait); 1591 add_wait_queue(&target->gcwait, &__wait);
1590 set_current_state(TASK_UNINTERRUPTIBLE); 1592 set_current_state(TASK_UNINTERRUPTIBLE);
1591 LOGGC_UNLOCK(log); 1593 LOGGC_UNLOCK(log);
1592 schedule(); 1594 schedule();
1593 current->state = TASK_RUNNING; 1595 current->state = TASK_RUNNING;
1594 LOGGC_LOCK(log); 1596 LOGGC_LOCK(log);
1595 remove_wait_queue(&target->gcwait, &__wait); 1597 remove_wait_queue(&target->gcwait, &__wait);
1596 } 1598 }
1597 LOGGC_UNLOCK(log); 1599 LOGGC_UNLOCK(log);
1598 1600
1599 if (wait < 2) 1601 if (wait < 2)
1600 return; 1602 return;
1601 1603
1602 list_for_each_entry(sbi, &log->sb_list, log_list) { 1604 list_for_each_entry(sbi, &log->sb_list, log_list) {
1603 filemap_fdatawrite(sbi->ipbmap->i_mapping); 1605 filemap_fdatawrite(sbi->ipbmap->i_mapping);
1604 filemap_fdatawrite(sbi->ipimap->i_mapping); 1606 filemap_fdatawrite(sbi->ipimap->i_mapping);
1605 filemap_fdatawrite(sbi->direct_inode->i_mapping); 1607 filemap_fdatawrite(sbi->direct_inode->i_mapping);
1606 } 1608 }
1607 1609
1608 /* 1610 /*
1609 * If there was recent activity, we may need to wait 1611 * If there was recent activity, we may need to wait
1610 * for the lazycommit thread to catch up 1612 * for the lazycommit thread to catch up
1611 */ 1613 */
1612 if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { 1614 if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1613 for (i = 0; i < 200; i++) { /* Too much? */ 1615 for (i = 0; i < 200; i++) { /* Too much? */
1614 msleep(250); 1616 msleep(250);
1615 if (list_empty(&log->cqueue) && 1617 if (list_empty(&log->cqueue) &&
1616 list_empty(&log->synclist)) 1618 list_empty(&log->synclist))
1617 break; 1619 break;
1618 } 1620 }
1619 } 1621 }
1620 assert(list_empty(&log->cqueue)); 1622 assert(list_empty(&log->cqueue));
1621 1623
1622 #ifdef CONFIG_JFS_DEBUG 1624 #ifdef CONFIG_JFS_DEBUG
1623 if (!list_empty(&log->synclist)) { 1625 if (!list_empty(&log->synclist)) {
1624 struct logsyncblk *lp; 1626 struct logsyncblk *lp;
1625 1627
1626 list_for_each_entry(lp, &log->synclist, synclist) { 1628 list_for_each_entry(lp, &log->synclist, synclist) {
1627 if (lp->xflag & COMMIT_PAGE) { 1629 if (lp->xflag & COMMIT_PAGE) {
1628 struct metapage *mp = (struct metapage *)lp; 1630 struct metapage *mp = (struct metapage *)lp;
1629 dump_mem("orphan metapage", lp, 1631 dump_mem("orphan metapage", lp,
1630 sizeof(struct metapage)); 1632 sizeof(struct metapage));
1631 dump_mem("page", mp->page, sizeof(struct page)); 1633 dump_mem("page", mp->page, sizeof(struct page));
1632 } 1634 }
1633 else 1635 else
1634 dump_mem("orphan tblock", lp, 1636 dump_mem("orphan tblock", lp,
1635 sizeof(struct tblock)); 1637 sizeof(struct tblock));
1636 } 1638 }
1637 } 1639 }
1638 #endif 1640 #endif
1639 //assert(list_empty(&log->synclist)); 1641 //assert(list_empty(&log->synclist));
1640 clear_bit(log_FLUSH, &log->flag); 1642 clear_bit(log_FLUSH, &log->flag);
1641 } 1643 }
1642 1644
1643 /* 1645 /*
1644 * NAME: lmLogShutdown() 1646 * NAME: lmLogShutdown()
1645 * 1647 *
1646 * FUNCTION: log shutdown at last LogClose(). 1648 * FUNCTION: log shutdown at last LogClose().
1647 * 1649 *
1648 * write log syncpt record. 1650 * write log syncpt record.
1649 * update super block to set redone flag to 0. 1651 * update super block to set redone flag to 0.
1650 * 1652 *
1651 * PARAMETER: log - log inode 1653 * PARAMETER: log - log inode
1652 * 1654 *
1653 * RETURN: 0 - success 1655 * RETURN: 0 - success
1654 * 1656 *
1655 * serialization: single last close thread 1657 * serialization: single last close thread
1656 */ 1658 */
1657 int lmLogShutdown(struct jfs_log * log) 1659 int lmLogShutdown(struct jfs_log * log)
1658 { 1660 {
1659 int rc; 1661 int rc;
1660 struct lrd lrd; 1662 struct lrd lrd;
1661 int lsn; 1663 int lsn;
1662 struct logsuper *logsuper; 1664 struct logsuper *logsuper;
1663 struct lbuf *bpsuper; 1665 struct lbuf *bpsuper;
1664 struct lbuf *bp; 1666 struct lbuf *bp;
1665 struct logpage *lp; 1667 struct logpage *lp;
1666 1668
1667 jfs_info("lmLogShutdown: log:0x%p", log); 1669 jfs_info("lmLogShutdown: log:0x%p", log);
1668 1670
1669 jfs_flush_journal(log, 2); 1671 jfs_flush_journal(log, 2);
1670 1672
1671 /* 1673 /*
1672 * write the last SYNCPT record with syncpoint = 0 1674 * write the last SYNCPT record with syncpoint = 0
1673 * (i.e., log redo up to HERE !) 1675 * (i.e., log redo up to HERE !)
1674 */ 1676 */
1675 lrd.logtid = 0; 1677 lrd.logtid = 0;
1676 lrd.backchain = 0; 1678 lrd.backchain = 0;
1677 lrd.type = cpu_to_le16(LOG_SYNCPT); 1679 lrd.type = cpu_to_le16(LOG_SYNCPT);
1678 lrd.length = 0; 1680 lrd.length = 0;
1679 lrd.log.syncpt.sync = 0; 1681 lrd.log.syncpt.sync = 0;
1680 1682
1681 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 1683 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1682 bp = log->bp; 1684 bp = log->bp;
1683 lp = (struct logpage *) bp->l_ldata; 1685 lp = (struct logpage *) bp->l_ldata;
1684 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 1686 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1685 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); 1687 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1686 lbmIOWait(log->bp, lbmFREE); 1688 lbmIOWait(log->bp, lbmFREE);
1687 log->bp = NULL; 1689 log->bp = NULL;
1688 1690
1689 /* 1691 /*
1690 * synchronous update log superblock 1692 * synchronous update log superblock
1691 * mark log state as shutdown cleanly 1693 * mark log state as shutdown cleanly
1692 * (i.e., Log does not need to be replayed). 1694 * (i.e., Log does not need to be replayed).
1693 */ 1695 */
1694 if ((rc = lbmRead(log, 1, &bpsuper))) 1696 if ((rc = lbmRead(log, 1, &bpsuper)))
1695 goto out; 1697 goto out;
1696 1698
1697 logsuper = (struct logsuper *) bpsuper->l_ldata; 1699 logsuper = (struct logsuper *) bpsuper->l_ldata;
1698 logsuper->state = cpu_to_le32(LOGREDONE); 1700 logsuper->state = cpu_to_le32(LOGREDONE);
1699 logsuper->end = cpu_to_le32(lsn); 1701 logsuper->end = cpu_to_le32(lsn);
1700 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1702 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1701 rc = lbmIOWait(bpsuper, lbmFREE); 1703 rc = lbmIOWait(bpsuper, lbmFREE);
1702 1704
1703 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", 1705 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1704 lsn, log->page, log->eor); 1706 lsn, log->page, log->eor);
1705 1707
1706 out: 1708 out:
1707 /* 1709 /*
1708 * shutdown per log i/o 1710 * shutdown per log i/o
1709 */ 1711 */
1710 lbmLogShutdown(log); 1712 lbmLogShutdown(log);
1711 1713
1712 if (rc) { 1714 if (rc) {
1713 jfs_warn("lmLogShutdown: exit(%d)", rc); 1715 jfs_warn("lmLogShutdown: exit(%d)", rc);
1714 } 1716 }
1715 return rc; 1717 return rc;
1716 } 1718 }
1717 1719
1718 1720
1719 /* 1721 /*
1720 * NAME: lmLogFileSystem() 1722 * NAME: lmLogFileSystem()
1721 * 1723 *
1722 * FUNCTION: insert (<activate> = true)/remove (<activate> = false) 1724 * FUNCTION: insert (<activate> = true)/remove (<activate> = false)
1723 * file system into/from log active file system list. 1725 * file system into/from log active file system list.
1724 * 1726 *
1725 * PARAMETE: log - pointer to logs inode. 1727 * PARAMETE: log - pointer to logs inode.
1726 * fsdev - kdev_t of filesystem. 1728 * fsdev - kdev_t of filesystem.
1727 * serial - pointer to returned log serial number 1729 * serial - pointer to returned log serial number
1728 * activate - insert/remove device from active list. 1730 * activate - insert/remove device from active list.
1729 * 1731 *
1730 * RETURN: 0 - success 1732 * RETURN: 0 - success
1731 * errors returned by vms_iowait(). 1733 * errors returned by vms_iowait().
1732 */ 1734 */
1733 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, 1735 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1734 int activate) 1736 int activate)
1735 { 1737 {
1736 int rc = 0; 1738 int rc = 0;
1737 int i; 1739 int i;
1738 struct logsuper *logsuper; 1740 struct logsuper *logsuper;
1739 struct lbuf *bpsuper; 1741 struct lbuf *bpsuper;
1740 char *uuid = sbi->uuid; 1742 char *uuid = sbi->uuid;
1741 1743
1742 /* 1744 /*
1743 * insert/remove file system device to log active file system list. 1745 * insert/remove file system device to log active file system list.
1744 */ 1746 */
1745 if ((rc = lbmRead(log, 1, &bpsuper))) 1747 if ((rc = lbmRead(log, 1, &bpsuper)))
1746 return rc; 1748 return rc;
1747 1749
1748 logsuper = (struct logsuper *) bpsuper->l_ldata; 1750 logsuper = (struct logsuper *) bpsuper->l_ldata;
1749 if (activate) { 1751 if (activate) {
1750 for (i = 0; i < MAX_ACTIVE; i++) 1752 for (i = 0; i < MAX_ACTIVE; i++)
1751 if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) { 1753 if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1752 memcpy(logsuper->active[i].uuid, uuid, 16); 1754 memcpy(logsuper->active[i].uuid, uuid, 16);
1753 sbi->aggregate = i; 1755 sbi->aggregate = i;
1754 break; 1756 break;
1755 } 1757 }
1756 if (i == MAX_ACTIVE) { 1758 if (i == MAX_ACTIVE) {
1757 jfs_warn("Too many file systems sharing journal!"); 1759 jfs_warn("Too many file systems sharing journal!");
1758 lbmFree(bpsuper); 1760 lbmFree(bpsuper);
1759 return -EMFILE; /* Is there a better rc? */ 1761 return -EMFILE; /* Is there a better rc? */
1760 } 1762 }
1761 } else { 1763 } else {
1762 for (i = 0; i < MAX_ACTIVE; i++) 1764 for (i = 0; i < MAX_ACTIVE; i++)
1763 if (!memcmp(logsuper->active[i].uuid, uuid, 16)) { 1765 if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1764 memcpy(logsuper->active[i].uuid, NULL_UUID, 16); 1766 memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1765 break; 1767 break;
1766 } 1768 }
1767 if (i == MAX_ACTIVE) { 1769 if (i == MAX_ACTIVE) {
1768 jfs_warn("Somebody stomped on the journal!"); 1770 jfs_warn("Somebody stomped on the journal!");
1769 lbmFree(bpsuper); 1771 lbmFree(bpsuper);
1770 return -EIO; 1772 return -EIO;
1771 } 1773 }
1772 1774
1773 } 1775 }
1774 1776
1775 /* 1777 /*
1776 * synchronous write log superblock: 1778 * synchronous write log superblock:
1777 * 1779 *
1778 * write sidestream bypassing write queue: 1780 * write sidestream bypassing write queue:
1779 * at file system mount, log super block is updated for 1781 * at file system mount, log super block is updated for
1780 * activation of the file system before any log record 1782 * activation of the file system before any log record
1781 * (MOUNT record) of the file system, and at file system 1783 * (MOUNT record) of the file system, and at file system
1782 * unmount, all meta data for the file system has been 1784 * unmount, all meta data for the file system has been
1783 * flushed before log super block is updated for deactivation 1785 * flushed before log super block is updated for deactivation
1784 * of the file system. 1786 * of the file system.
1785 */ 1787 */
1786 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1788 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1787 rc = lbmIOWait(bpsuper, lbmFREE); 1789 rc = lbmIOWait(bpsuper, lbmFREE);
1788 1790
1789 return rc; 1791 return rc;
1790 } 1792 }
1791 1793
1792 /* 1794 /*
1793 * log buffer manager (lbm) 1795 * log buffer manager (lbm)
1794 * ------------------------ 1796 * ------------------------
1795 * 1797 *
1796 * special purpose buffer manager supporting log i/o requirements. 1798 * special purpose buffer manager supporting log i/o requirements.
1797 * 1799 *
1798 * per log write queue: 1800 * per log write queue:
1799 * log pageout occurs in serial order by fifo write queue and 1801 * log pageout occurs in serial order by fifo write queue and
1800 * restricting to a single i/o in pregress at any one time. 1802 * restricting to a single i/o in pregress at any one time.
1801 * a circular singly-linked list 1803 * a circular singly-linked list
1802 * (log->wrqueue points to the tail, and buffers are linked via 1804 * (log->wrqueue points to the tail, and buffers are linked via
1803 * bp->wrqueue field), and 1805 * bp->wrqueue field), and
1804 * maintains log page in pageout ot waiting for pageout in serial pageout. 1806 * maintains log page in pageout ot waiting for pageout in serial pageout.
1805 */ 1807 */
1806 1808
1807 /* 1809 /*
1808 * lbmLogInit() 1810 * lbmLogInit()
1809 * 1811 *
1810 * initialize per log I/O setup at lmLogInit() 1812 * initialize per log I/O setup at lmLogInit()
1811 */ 1813 */
1812 static int lbmLogInit(struct jfs_log * log) 1814 static int lbmLogInit(struct jfs_log * log)
1813 { /* log inode */ 1815 { /* log inode */
1814 int i; 1816 int i;
1815 struct lbuf *lbuf; 1817 struct lbuf *lbuf;
1816 1818
1817 jfs_info("lbmLogInit: log:0x%p", log); 1819 jfs_info("lbmLogInit: log:0x%p", log);
1818 1820
1819 /* initialize current buffer cursor */ 1821 /* initialize current buffer cursor */
1820 log->bp = NULL; 1822 log->bp = NULL;
1821 1823
1822 /* initialize log device write queue */ 1824 /* initialize log device write queue */
1823 log->wqueue = NULL; 1825 log->wqueue = NULL;
1824 1826
1825 /* 1827 /*
1826 * Each log has its own buffer pages allocated to it. These are 1828 * Each log has its own buffer pages allocated to it. These are
1827 * not managed by the page cache. This ensures that a transaction 1829 * not managed by the page cache. This ensures that a transaction
1828 * writing to the log does not block trying to allocate a page from 1830 * writing to the log does not block trying to allocate a page from
1829 * the page cache (for the log). This would be bad, since page 1831 * the page cache (for the log). This would be bad, since page
1830 * allocation waits on the kswapd thread that may be committing inodes 1832 * allocation waits on the kswapd thread that may be committing inodes
1831 * which would cause log activity. Was that clear? I'm trying to 1833 * which would cause log activity. Was that clear? I'm trying to
1832 * avoid deadlock here. 1834 * avoid deadlock here.
1833 */ 1835 */
1834 init_waitqueue_head(&log->free_wait); 1836 init_waitqueue_head(&log->free_wait);
1835 1837
1836 log->lbuf_free = NULL; 1838 log->lbuf_free = NULL;
1837 1839
1838 for (i = 0; i < LOGPAGES;) { 1840 for (i = 0; i < LOGPAGES;) {
1839 char *buffer; 1841 char *buffer;
1840 uint offset; 1842 uint offset;
1841 struct page *page; 1843 struct page *page;
1842 1844
1843 buffer = (char *) get_zeroed_page(GFP_KERNEL); 1845 buffer = (char *) get_zeroed_page(GFP_KERNEL);
1844 if (buffer == NULL) 1846 if (buffer == NULL)
1845 goto error; 1847 goto error;
1846 page = virt_to_page(buffer); 1848 page = virt_to_page(buffer);
1847 for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) { 1849 for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1848 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); 1850 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1849 if (lbuf == NULL) { 1851 if (lbuf == NULL) {
1850 if (offset == 0) 1852 if (offset == 0)
1851 free_page((unsigned long) buffer); 1853 free_page((unsigned long) buffer);
1852 goto error; 1854 goto error;
1853 } 1855 }
1854 if (offset) /* we already have one reference */ 1856 if (offset) /* we already have one reference */
1855 get_page(page); 1857 get_page(page);
1856 lbuf->l_offset = offset; 1858 lbuf->l_offset = offset;
1857 lbuf->l_ldata = buffer + offset; 1859 lbuf->l_ldata = buffer + offset;
1858 lbuf->l_page = page; 1860 lbuf->l_page = page;
1859 lbuf->l_log = log; 1861 lbuf->l_log = log;
1860 init_waitqueue_head(&lbuf->l_ioevent); 1862 init_waitqueue_head(&lbuf->l_ioevent);
1861 1863
1862 lbuf->l_freelist = log->lbuf_free; 1864 lbuf->l_freelist = log->lbuf_free;
1863 log->lbuf_free = lbuf; 1865 log->lbuf_free = lbuf;
1864 i++; 1866 i++;
1865 } 1867 }
1866 } 1868 }
1867 1869
1868 return (0); 1870 return (0);
1869 1871
1870 error: 1872 error:
1871 lbmLogShutdown(log); 1873 lbmLogShutdown(log);
1872 return -ENOMEM; 1874 return -ENOMEM;
1873 } 1875 }
1874 1876
1875 1877
1876 /* 1878 /*
1877 * lbmLogShutdown() 1879 * lbmLogShutdown()
1878 * 1880 *
1879 * finalize per log I/O setup at lmLogShutdown() 1881 * finalize per log I/O setup at lmLogShutdown()
1880 */ 1882 */
1881 static void lbmLogShutdown(struct jfs_log * log) 1883 static void lbmLogShutdown(struct jfs_log * log)
1882 { 1884 {
1883 struct lbuf *lbuf; 1885 struct lbuf *lbuf;
1884 1886
1885 jfs_info("lbmLogShutdown: log:0x%p", log); 1887 jfs_info("lbmLogShutdown: log:0x%p", log);
1886 1888
1887 lbuf = log->lbuf_free; 1889 lbuf = log->lbuf_free;
1888 while (lbuf) { 1890 while (lbuf) {
1889 struct lbuf *next = lbuf->l_freelist; 1891 struct lbuf *next = lbuf->l_freelist;
1890 __free_page(lbuf->l_page); 1892 __free_page(lbuf->l_page);
1891 kfree(lbuf); 1893 kfree(lbuf);
1892 lbuf = next; 1894 lbuf = next;
1893 } 1895 }
1894 } 1896 }
1895 1897
1896 1898
1897 /* 1899 /*
1898 * lbmAllocate() 1900 * lbmAllocate()
1899 * 1901 *
1900 * allocate an empty log buffer 1902 * allocate an empty log buffer
1901 */ 1903 */
1902 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn) 1904 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1903 { 1905 {
1904 struct lbuf *bp; 1906 struct lbuf *bp;
1905 unsigned long flags; 1907 unsigned long flags;
1906 1908
1907 /* 1909 /*
1908 * recycle from log buffer freelist if any 1910 * recycle from log buffer freelist if any
1909 */ 1911 */
1910 LCACHE_LOCK(flags); 1912 LCACHE_LOCK(flags);
1911 LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags); 1913 LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1912 log->lbuf_free = bp->l_freelist; 1914 log->lbuf_free = bp->l_freelist;
1913 LCACHE_UNLOCK(flags); 1915 LCACHE_UNLOCK(flags);
1914 1916
1915 bp->l_flag = 0; 1917 bp->l_flag = 0;
1916 1918
1917 bp->l_wqnext = NULL; 1919 bp->l_wqnext = NULL;
1918 bp->l_freelist = NULL; 1920 bp->l_freelist = NULL;
1919 1921
1920 bp->l_pn = pn; 1922 bp->l_pn = pn;
1921 bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize)); 1923 bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1922 bp->l_ceor = 0; 1924 bp->l_ceor = 0;
1923 1925
1924 return bp; 1926 return bp;
1925 } 1927 }
1926 1928
1927 1929
1928 /* 1930 /*
1929 * lbmFree() 1931 * lbmFree()
1930 * 1932 *
1931 * release a log buffer to freelist 1933 * release a log buffer to freelist
1932 */ 1934 */
1933 static void lbmFree(struct lbuf * bp) 1935 static void lbmFree(struct lbuf * bp)
1934 { 1936 {
1935 unsigned long flags; 1937 unsigned long flags;
1936 1938
1937 LCACHE_LOCK(flags); 1939 LCACHE_LOCK(flags);
1938 1940
1939 lbmfree(bp); 1941 lbmfree(bp);
1940 1942
1941 LCACHE_UNLOCK(flags); 1943 LCACHE_UNLOCK(flags);
1942 } 1944 }
1943 1945
1944 static void lbmfree(struct lbuf * bp) 1946 static void lbmfree(struct lbuf * bp)
1945 { 1947 {
1946 struct jfs_log *log = bp->l_log; 1948 struct jfs_log *log = bp->l_log;
1947 1949
1948 assert(bp->l_wqnext == NULL); 1950 assert(bp->l_wqnext == NULL);
1949 1951
1950 /* 1952 /*
1951 * return the buffer to head of freelist 1953 * return the buffer to head of freelist
1952 */ 1954 */
1953 bp->l_freelist = log->lbuf_free; 1955 bp->l_freelist = log->lbuf_free;
1954 log->lbuf_free = bp; 1956 log->lbuf_free = bp;
1955 1957
1956 wake_up(&log->free_wait); 1958 wake_up(&log->free_wait);
1957 return; 1959 return;
1958 } 1960 }
1959 1961
1960 1962
1961 /* 1963 /*
1962 * NAME: lbmRedrive 1964 * NAME: lbmRedrive
1963 * 1965 *
1964 * FUNCTION: add a log buffer to the the log redrive list 1966 * FUNCTION: add a log buffer to the the log redrive list
1965 * 1967 *
1966 * PARAMETER: 1968 * PARAMETER:
1967 * bp - log buffer 1969 * bp - log buffer
1968 * 1970 *
1969 * NOTES: 1971 * NOTES:
1970 * Takes log_redrive_lock. 1972 * Takes log_redrive_lock.
1971 */ 1973 */
1972 static inline void lbmRedrive(struct lbuf *bp) 1974 static inline void lbmRedrive(struct lbuf *bp)
1973 { 1975 {
1974 unsigned long flags; 1976 unsigned long flags;
1975 1977
1976 spin_lock_irqsave(&log_redrive_lock, flags); 1978 spin_lock_irqsave(&log_redrive_lock, flags);
1977 bp->l_redrive_next = log_redrive_list; 1979 bp->l_redrive_next = log_redrive_list;
1978 log_redrive_list = bp; 1980 log_redrive_list = bp;
1979 spin_unlock_irqrestore(&log_redrive_lock, flags); 1981 spin_unlock_irqrestore(&log_redrive_lock, flags);
1980 1982
1981 wake_up(&jfs_IO_thread_wait); 1983 wake_up(&jfs_IO_thread_wait);
1982 } 1984 }
1983 1985
1984 1986
1985 /* 1987 /*
1986 * lbmRead() 1988 * lbmRead()
1987 */ 1989 */
1988 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) 1990 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1989 { 1991 {
1990 struct bio *bio; 1992 struct bio *bio;
1991 struct lbuf *bp; 1993 struct lbuf *bp;
1992 1994
1993 /* 1995 /*
1994 * allocate a log buffer 1996 * allocate a log buffer
1995 */ 1997 */
1996 *bpp = bp = lbmAllocate(log, pn); 1998 *bpp = bp = lbmAllocate(log, pn);
1997 jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn); 1999 jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1998 2000
1999 bp->l_flag |= lbmREAD; 2001 bp->l_flag |= lbmREAD;
2000 2002
2001 bio = bio_alloc(GFP_NOFS, 1); 2003 bio = bio_alloc(GFP_NOFS, 1);
2002 2004
2003 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2005 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
2004 bio->bi_bdev = log->bdev; 2006 bio->bi_bdev = log->bdev;
2005 bio->bi_io_vec[0].bv_page = bp->l_page; 2007 bio->bi_io_vec[0].bv_page = bp->l_page;
2006 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2008 bio->bi_io_vec[0].bv_len = LOGPSIZE;
2007 bio->bi_io_vec[0].bv_offset = bp->l_offset; 2009 bio->bi_io_vec[0].bv_offset = bp->l_offset;
2008 2010
2009 bio->bi_vcnt = 1; 2011 bio->bi_vcnt = 1;
2010 bio->bi_idx = 0; 2012 bio->bi_idx = 0;
2011 bio->bi_size = LOGPSIZE; 2013 bio->bi_size = LOGPSIZE;
2012 2014
2013 bio->bi_end_io = lbmIODone; 2015 bio->bi_end_io = lbmIODone;
2014 bio->bi_private = bp; 2016 bio->bi_private = bp;
2015 submit_bio(READ_SYNC, bio); 2017 submit_bio(READ_SYNC, bio);
2016 2018
2017 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); 2019 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2018 2020
2019 return 0; 2021 return 0;
2020 } 2022 }
2021 2023
2022 2024
2023 /* 2025 /*
2024 * lbmWrite() 2026 * lbmWrite()
2025 * 2027 *
2026 * buffer at head of pageout queue stays after completion of 2028 * buffer at head of pageout queue stays after completion of
2027 * partial-page pageout and redriven by explicit initiation of 2029 * partial-page pageout and redriven by explicit initiation of
2028 * pageout by caller until full-page pageout is completed and 2030 * pageout by caller until full-page pageout is completed and
2029 * released. 2031 * released.
2030 * 2032 *
2031 * device driver i/o done redrives pageout of new buffer at 2033 * device driver i/o done redrives pageout of new buffer at
2032 * head of pageout queue when current buffer at head of pageout 2034 * head of pageout queue when current buffer at head of pageout
2033 * queue is released at the completion of its full-page pageout. 2035 * queue is released at the completion of its full-page pageout.
2034 * 2036 *
2035 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). 2037 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2036 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone() 2038 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2037 */ 2039 */
2038 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, 2040 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2039 int cant_block) 2041 int cant_block)
2040 { 2042 {
2041 struct lbuf *tail; 2043 struct lbuf *tail;
2042 unsigned long flags; 2044 unsigned long flags;
2043 2045
2044 jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn); 2046 jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2045 2047
2046 /* map the logical block address to physical block address */ 2048 /* map the logical block address to physical block address */
2047 bp->l_blkno = 2049 bp->l_blkno =
2048 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); 2050 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2049 2051
2050 LCACHE_LOCK(flags); /* disable+lock */ 2052 LCACHE_LOCK(flags); /* disable+lock */
2051 2053
2052 /* 2054 /*
2053 * initialize buffer for device driver 2055 * initialize buffer for device driver
2054 */ 2056 */
2055 bp->l_flag = flag; 2057 bp->l_flag = flag;
2056 2058
2057 /* 2059 /*
2058 * insert bp at tail of write queue associated with log 2060 * insert bp at tail of write queue associated with log
2059 * 2061 *
2060 * (request is either for bp already/currently at head of queue 2062 * (request is either for bp already/currently at head of queue
2061 * or new bp to be inserted at tail) 2063 * or new bp to be inserted at tail)
2062 */ 2064 */
2063 tail = log->wqueue; 2065 tail = log->wqueue;
2064 2066
2065 /* is buffer not already on write queue ? */ 2067 /* is buffer not already on write queue ? */
2066 if (bp->l_wqnext == NULL) { 2068 if (bp->l_wqnext == NULL) {
2067 /* insert at tail of wqueue */ 2069 /* insert at tail of wqueue */
2068 if (tail == NULL) { 2070 if (tail == NULL) {
2069 log->wqueue = bp; 2071 log->wqueue = bp;
2070 bp->l_wqnext = bp; 2072 bp->l_wqnext = bp;
2071 } else { 2073 } else {
2072 log->wqueue = bp; 2074 log->wqueue = bp;
2073 bp->l_wqnext = tail->l_wqnext; 2075 bp->l_wqnext = tail->l_wqnext;
2074 tail->l_wqnext = bp; 2076 tail->l_wqnext = bp;
2075 } 2077 }
2076 2078
2077 tail = bp; 2079 tail = bp;
2078 } 2080 }
2079 2081
2080 /* is buffer at head of wqueue and for write ? */ 2082 /* is buffer at head of wqueue and for write ? */
2081 if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) { 2083 if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2082 LCACHE_UNLOCK(flags); /* unlock+enable */ 2084 LCACHE_UNLOCK(flags); /* unlock+enable */
2083 return; 2085 return;
2084 } 2086 }
2085 2087
2086 LCACHE_UNLOCK(flags); /* unlock+enable */ 2088 LCACHE_UNLOCK(flags); /* unlock+enable */
2087 2089
2088 if (cant_block) 2090 if (cant_block)
2089 lbmRedrive(bp); 2091 lbmRedrive(bp);
2090 else if (flag & lbmSYNC) 2092 else if (flag & lbmSYNC)
2091 lbmStartIO(bp); 2093 lbmStartIO(bp);
2092 else { 2094 else {
2093 LOGGC_UNLOCK(log); 2095 LOGGC_UNLOCK(log);
2094 lbmStartIO(bp); 2096 lbmStartIO(bp);
2095 LOGGC_LOCK(log); 2097 LOGGC_LOCK(log);
2096 } 2098 }
2097 } 2099 }
2098 2100
2099 2101
2100 /* 2102 /*
2101 * lbmDirectWrite() 2103 * lbmDirectWrite()
2102 * 2104 *
2103 * initiate pageout bypassing write queue for sidestream 2105 * initiate pageout bypassing write queue for sidestream
2104 * (e.g., log superblock) write; 2106 * (e.g., log superblock) write;
2105 */ 2107 */
2106 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) 2108 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2107 { 2109 {
2108 jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x", 2110 jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2109 bp, flag, bp->l_pn); 2111 bp, flag, bp->l_pn);
2110 2112
2111 /* 2113 /*
2112 * initialize buffer for device driver 2114 * initialize buffer for device driver
2113 */ 2115 */
2114 bp->l_flag = flag | lbmDIRECT; 2116 bp->l_flag = flag | lbmDIRECT;
2115 2117
2116 /* map the logical block address to physical block address */ 2118 /* map the logical block address to physical block address */
2117 bp->l_blkno = 2119 bp->l_blkno =
2118 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); 2120 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2119 2121
2120 /* 2122 /*
2121 * initiate pageout of the page 2123 * initiate pageout of the page
2122 */ 2124 */
2123 lbmStartIO(bp); 2125 lbmStartIO(bp);
2124 } 2126 }
2125 2127
2126 2128
2127 /* 2129 /*
2128 * NAME: lbmStartIO() 2130 * NAME: lbmStartIO()
2129 * 2131 *
2130 * FUNCTION: Interface to DD strategy routine 2132 * FUNCTION: Interface to DD strategy routine
2131 * 2133 *
2132 * RETURN: none 2134 * RETURN: none
2133 * 2135 *
2134 * serialization: LCACHE_LOCK() is NOT held during log i/o; 2136 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2135 */ 2137 */
2136 static void lbmStartIO(struct lbuf * bp) 2138 static void lbmStartIO(struct lbuf * bp)
2137 { 2139 {
2138 struct bio *bio; 2140 struct bio *bio;
2139 struct jfs_log *log = bp->l_log; 2141 struct jfs_log *log = bp->l_log;
2140 2142
2141 jfs_info("lbmStartIO\n"); 2143 jfs_info("lbmStartIO\n");
2142 2144
2143 bio = bio_alloc(GFP_NOFS, 1); 2145 bio = bio_alloc(GFP_NOFS, 1);
2144 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2146 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
2145 bio->bi_bdev = log->bdev; 2147 bio->bi_bdev = log->bdev;
2146 bio->bi_io_vec[0].bv_page = bp->l_page; 2148 bio->bi_io_vec[0].bv_page = bp->l_page;
2147 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2149 bio->bi_io_vec[0].bv_len = LOGPSIZE;
2148 bio->bi_io_vec[0].bv_offset = bp->l_offset; 2150 bio->bi_io_vec[0].bv_offset = bp->l_offset;
2149 2151
2150 bio->bi_vcnt = 1; 2152 bio->bi_vcnt = 1;
2151 bio->bi_idx = 0; 2153 bio->bi_idx = 0;
2152 bio->bi_size = LOGPSIZE; 2154 bio->bi_size = LOGPSIZE;
2153 2155
2154 bio->bi_end_io = lbmIODone; 2156 bio->bi_end_io = lbmIODone;
2155 bio->bi_private = bp; 2157 bio->bi_private = bp;
2156 2158
2157 /* check if journaling to disk has been disabled */ 2159 /* check if journaling to disk has been disabled */
2158 if (log->no_integrity) { 2160 if (log->no_integrity) {
2159 bio->bi_size = 0; 2161 bio->bi_size = 0;
2160 lbmIODone(bio, 0, 0); 2162 lbmIODone(bio, 0, 0);
2161 } else { 2163 } else {
2162 submit_bio(WRITE_SYNC, bio); 2164 submit_bio(WRITE_SYNC, bio);
2163 INCREMENT(lmStat.submitted); 2165 INCREMENT(lmStat.submitted);
2164 } 2166 }
2165 } 2167 }
2166 2168
2167 2169
2168 /* 2170 /*
2169 * lbmIOWait() 2171 * lbmIOWait()
2170 */ 2172 */
2171 static int lbmIOWait(struct lbuf * bp, int flag) 2173 static int lbmIOWait(struct lbuf * bp, int flag)
2172 { 2174 {
2173 unsigned long flags; 2175 unsigned long flags;
2174 int rc = 0; 2176 int rc = 0;
2175 2177
2176 jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); 2178 jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2177 2179
2178 LCACHE_LOCK(flags); /* disable+lock */ 2180 LCACHE_LOCK(flags); /* disable+lock */
2179 2181
2180 LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags); 2182 LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2181 2183
2182 rc = (bp->l_flag & lbmERROR) ? -EIO : 0; 2184 rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2183 2185
2184 if (flag & lbmFREE) 2186 if (flag & lbmFREE)
2185 lbmfree(bp); 2187 lbmfree(bp);
2186 2188
2187 LCACHE_UNLOCK(flags); /* unlock+enable */ 2189 LCACHE_UNLOCK(flags); /* unlock+enable */
2188 2190
2189 jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); 2191 jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2190 return rc; 2192 return rc;
2191 } 2193 }
2192 2194
2193 /* 2195 /*
2194 * lbmIODone() 2196 * lbmIODone()
2195 * 2197 *
2196 * executed at INTIODONE level 2198 * executed at INTIODONE level
2197 */ 2199 */
2198 static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) 2200 static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2199 { 2201 {
2200 struct lbuf *bp = bio->bi_private; 2202 struct lbuf *bp = bio->bi_private;
2201 struct lbuf *nextbp, *tail; 2203 struct lbuf *nextbp, *tail;
2202 struct jfs_log *log; 2204 struct jfs_log *log;
2203 unsigned long flags; 2205 unsigned long flags;
2204 2206
2205 if (bio->bi_size) 2207 if (bio->bi_size)
2206 return 1; 2208 return 1;
2207 2209
2208 /* 2210 /*
2209 * get back jfs buffer bound to the i/o buffer 2211 * get back jfs buffer bound to the i/o buffer
2210 */ 2212 */
2211 jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag); 2213 jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2212 2214
2213 LCACHE_LOCK(flags); /* disable+lock */ 2215 LCACHE_LOCK(flags); /* disable+lock */
2214 2216
2215 bp->l_flag |= lbmDONE; 2217 bp->l_flag |= lbmDONE;
2216 2218
2217 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2219 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
2218 bp->l_flag |= lbmERROR; 2220 bp->l_flag |= lbmERROR;
2219 2221
2220 jfs_err("lbmIODone: I/O error in JFS log"); 2222 jfs_err("lbmIODone: I/O error in JFS log");
2221 } 2223 }
2222 2224
2223 bio_put(bio); 2225 bio_put(bio);
2224 2226
2225 /* 2227 /*
2226 * pagein completion 2228 * pagein completion
2227 */ 2229 */
2228 if (bp->l_flag & lbmREAD) { 2230 if (bp->l_flag & lbmREAD) {
2229 bp->l_flag &= ~lbmREAD; 2231 bp->l_flag &= ~lbmREAD;
2230 2232
2231 LCACHE_UNLOCK(flags); /* unlock+enable */ 2233 LCACHE_UNLOCK(flags); /* unlock+enable */
2232 2234
2233 /* wakeup I/O initiator */ 2235 /* wakeup I/O initiator */
2234 LCACHE_WAKEUP(&bp->l_ioevent); 2236 LCACHE_WAKEUP(&bp->l_ioevent);
2235 2237
2236 return 0; 2238 return 0;
2237 } 2239 }
2238 2240
2239 /* 2241 /*
2240 * pageout completion 2242 * pageout completion
2241 * 2243 *
2242 * the bp at the head of write queue has completed pageout. 2244 * the bp at the head of write queue has completed pageout.
2243 * 2245 *
2244 * if single-commit/full-page pageout, remove the current buffer 2246 * if single-commit/full-page pageout, remove the current buffer
2245 * from head of pageout queue, and redrive pageout with 2247 * from head of pageout queue, and redrive pageout with
2246 * the new buffer at head of pageout queue; 2248 * the new buffer at head of pageout queue;
2247 * otherwise, the partial-page pageout buffer stays at 2249 * otherwise, the partial-page pageout buffer stays at
2248 * the head of pageout queue to be redriven for pageout 2250 * the head of pageout queue to be redriven for pageout
2249 * by lmGroupCommit() until full-page pageout is completed. 2251 * by lmGroupCommit() until full-page pageout is completed.
2250 */ 2252 */
2251 bp->l_flag &= ~lbmWRITE; 2253 bp->l_flag &= ~lbmWRITE;
2252 INCREMENT(lmStat.pagedone); 2254 INCREMENT(lmStat.pagedone);
2253 2255
2254 /* update committed lsn */ 2256 /* update committed lsn */
2255 log = bp->l_log; 2257 log = bp->l_log;
2256 log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor; 2258 log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2257 2259
2258 if (bp->l_flag & lbmDIRECT) { 2260 if (bp->l_flag & lbmDIRECT) {
2259 LCACHE_WAKEUP(&bp->l_ioevent); 2261 LCACHE_WAKEUP(&bp->l_ioevent);
2260 LCACHE_UNLOCK(flags); 2262 LCACHE_UNLOCK(flags);
2261 return 0; 2263 return 0;
2262 } 2264 }
2263 2265
2264 tail = log->wqueue; 2266 tail = log->wqueue;
2265 2267
2266 /* single element queue */ 2268 /* single element queue */
2267 if (bp == tail) { 2269 if (bp == tail) {
2268 /* remove head buffer of full-page pageout 2270 /* remove head buffer of full-page pageout
2269 * from log device write queue 2271 * from log device write queue
2270 */ 2272 */
2271 if (bp->l_flag & lbmRELEASE) { 2273 if (bp->l_flag & lbmRELEASE) {
2272 log->wqueue = NULL; 2274 log->wqueue = NULL;
2273 bp->l_wqnext = NULL; 2275 bp->l_wqnext = NULL;
2274 } 2276 }
2275 } 2277 }
2276 /* multi element queue */ 2278 /* multi element queue */
2277 else { 2279 else {
2278 /* remove head buffer of full-page pageout 2280 /* remove head buffer of full-page pageout
2279 * from log device write queue 2281 * from log device write queue
2280 */ 2282 */
2281 if (bp->l_flag & lbmRELEASE) { 2283 if (bp->l_flag & lbmRELEASE) {
2282 nextbp = tail->l_wqnext = bp->l_wqnext; 2284 nextbp = tail->l_wqnext = bp->l_wqnext;
2283 bp->l_wqnext = NULL; 2285 bp->l_wqnext = NULL;
2284 2286
2285 /* 2287 /*
2286 * redrive pageout of next page at head of write queue: 2288 * redrive pageout of next page at head of write queue:
2287 * redrive next page without any bound tblk 2289 * redrive next page without any bound tblk
2288 * (i.e., page w/o any COMMIT records), or 2290 * (i.e., page w/o any COMMIT records), or
2289 * first page of new group commit which has been 2291 * first page of new group commit which has been
2290 * queued after current page (subsequent pageout 2292 * queued after current page (subsequent pageout
2291 * is performed synchronously, except page without 2293 * is performed synchronously, except page without
2292 * any COMMITs) by lmGroupCommit() as indicated 2294 * any COMMITs) by lmGroupCommit() as indicated
2293 * by lbmWRITE flag; 2295 * by lbmWRITE flag;
2294 */ 2296 */
2295 if (nextbp->l_flag & lbmWRITE) { 2297 if (nextbp->l_flag & lbmWRITE) {
2296 /* 2298 /*
2297 * We can't do the I/O at interrupt time. 2299 * We can't do the I/O at interrupt time.
2298 * The jfsIO thread can do it 2300 * The jfsIO thread can do it
2299 */ 2301 */
2300 lbmRedrive(nextbp); 2302 lbmRedrive(nextbp);
2301 } 2303 }
2302 } 2304 }
2303 } 2305 }
2304 2306
2305 /* 2307 /*
2306 * synchronous pageout: 2308 * synchronous pageout:
2307 * 2309 *
2308 * buffer has not necessarily been removed from write queue 2310 * buffer has not necessarily been removed from write queue
2309 * (e.g., synchronous write of partial-page with COMMIT): 2311 * (e.g., synchronous write of partial-page with COMMIT):
2310 * leave buffer for i/o initiator to dispose 2312 * leave buffer for i/o initiator to dispose
2311 */ 2313 */
2312 if (bp->l_flag & lbmSYNC) { 2314 if (bp->l_flag & lbmSYNC) {
2313 LCACHE_UNLOCK(flags); /* unlock+enable */ 2315 LCACHE_UNLOCK(flags); /* unlock+enable */
2314 2316
2315 /* wakeup I/O initiator */ 2317 /* wakeup I/O initiator */
2316 LCACHE_WAKEUP(&bp->l_ioevent); 2318 LCACHE_WAKEUP(&bp->l_ioevent);
2317 } 2319 }
2318 2320
2319 /* 2321 /*
2320 * Group Commit pageout: 2322 * Group Commit pageout:
2321 */ 2323 */
2322 else if (bp->l_flag & lbmGC) { 2324 else if (bp->l_flag & lbmGC) {
2323 LCACHE_UNLOCK(flags); 2325 LCACHE_UNLOCK(flags);
2324 lmPostGC(bp); 2326 lmPostGC(bp);
2325 } 2327 }
2326 2328
2327 /* 2329 /*
2328 * asynchronous pageout: 2330 * asynchronous pageout:
2329 * 2331 *
2330 * buffer must have been removed from write queue: 2332 * buffer must have been removed from write queue:
2331 * insert buffer at head of freelist where it can be recycled 2333 * insert buffer at head of freelist where it can be recycled
2332 */ 2334 */
2333 else { 2335 else {
2334 assert(bp->l_flag & lbmRELEASE); 2336 assert(bp->l_flag & lbmRELEASE);
2335 assert(bp->l_flag & lbmFREE); 2337 assert(bp->l_flag & lbmFREE);
2336 lbmfree(bp); 2338 lbmfree(bp);
2337 2339
2338 LCACHE_UNLOCK(flags); /* unlock+enable */ 2340 LCACHE_UNLOCK(flags); /* unlock+enable */
2339 } 2341 }
2340 2342
2341 return 0; 2343 return 0;
2342 } 2344 }
2343 2345
2344 int jfsIOWait(void *arg) 2346 int jfsIOWait(void *arg)
2345 { 2347 {
2346 struct lbuf *bp; 2348 struct lbuf *bp;
2347 2349
2348 daemonize("jfsIO"); 2350 daemonize("jfsIO");
2349 2351
2350 complete(&jfsIOwait); 2352 complete(&jfsIOwait);
2351 2353
2352 do { 2354 do {
2353 DECLARE_WAITQUEUE(wq, current); 2355 DECLARE_WAITQUEUE(wq, current);
2354 2356
2355 spin_lock_irq(&log_redrive_lock); 2357 spin_lock_irq(&log_redrive_lock);
2356 while ((bp = log_redrive_list) != 0) { 2358 while ((bp = log_redrive_list) != 0) {
2357 log_redrive_list = bp->l_redrive_next; 2359 log_redrive_list = bp->l_redrive_next;
2358 bp->l_redrive_next = NULL; 2360 bp->l_redrive_next = NULL;
2359 spin_unlock_irq(&log_redrive_lock); 2361 spin_unlock_irq(&log_redrive_lock);
2360 lbmStartIO(bp); 2362 lbmStartIO(bp);
2361 spin_lock_irq(&log_redrive_lock); 2363 spin_lock_irq(&log_redrive_lock);
2362 } 2364 }
2363 if (freezing(current)) { 2365 if (freezing(current)) {
2364 spin_unlock_irq(&log_redrive_lock); 2366 spin_unlock_irq(&log_redrive_lock);
2365 refrigerator(); 2367 refrigerator();
2366 } else { 2368 } else {
2367 add_wait_queue(&jfs_IO_thread_wait, &wq); 2369 add_wait_queue(&jfs_IO_thread_wait, &wq);
2368 set_current_state(TASK_INTERRUPTIBLE); 2370 set_current_state(TASK_INTERRUPTIBLE);
2369 spin_unlock_irq(&log_redrive_lock); 2371 spin_unlock_irq(&log_redrive_lock);
2370 schedule(); 2372 schedule();
2371 current->state = TASK_RUNNING; 2373 current->state = TASK_RUNNING;
2372 remove_wait_queue(&jfs_IO_thread_wait, &wq); 2374 remove_wait_queue(&jfs_IO_thread_wait, &wq);
2373 } 2375 }
2374 } while (!jfs_stop_threads); 2376 } while (!jfs_stop_threads);
2375 2377
2376 jfs_info("jfsIOWait being killed!"); 2378 jfs_info("jfsIOWait being killed!");
2377 complete_and_exit(&jfsIOwait, 0); 2379 complete_and_exit(&jfsIOwait, 0);
2378 } 2380 }
2379 2381
2380 /* 2382 /*
2381 * NAME: lmLogFormat()/jfs_logform() 2383 * NAME: lmLogFormat()/jfs_logform()
2382 * 2384 *
2383 * FUNCTION: format file system log 2385 * FUNCTION: format file system log
2384 * 2386 *
2385 * PARAMETERS: 2387 * PARAMETERS:
2386 * log - volume log 2388 * log - volume log
2387 * logAddress - start address of log space in FS block 2389 * logAddress - start address of log space in FS block
2388 * logSize - length of log space in FS block; 2390 * logSize - length of log space in FS block;
2389 * 2391 *
2390 * RETURN: 0 - success 2392 * RETURN: 0 - success
2391 * -EIO - i/o error 2393 * -EIO - i/o error
2392 * 2394 *
2393 * XXX: We're synchronously writing one page at a time. This needs to 2395 * XXX: We're synchronously writing one page at a time. This needs to
2394 * be improved by writing multiple pages at once. 2396 * be improved by writing multiple pages at once.
2395 */ 2397 */
2396 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) 2398 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2397 { 2399 {
2398 int rc = -EIO; 2400 int rc = -EIO;
2399 struct jfs_sb_info *sbi; 2401 struct jfs_sb_info *sbi;
2400 struct logsuper *logsuper; 2402 struct logsuper *logsuper;
2401 struct logpage *lp; 2403 struct logpage *lp;
2402 int lspn; /* log sequence page number */ 2404 int lspn; /* log sequence page number */
2403 struct lrd *lrd_ptr; 2405 struct lrd *lrd_ptr;
2404 int npages = 0; 2406 int npages = 0;
2405 struct lbuf *bp; 2407 struct lbuf *bp;
2406 2408
2407 jfs_info("lmLogFormat: logAddress:%Ld logSize:%d", 2409 jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2408 (long long)logAddress, logSize); 2410 (long long)logAddress, logSize);
2409 2411
2410 sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list); 2412 sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2411 2413
2412 /* allocate a log buffer */ 2414 /* allocate a log buffer */
2413 bp = lbmAllocate(log, 1); 2415 bp = lbmAllocate(log, 1);
2414 2416
2415 npages = logSize >> sbi->l2nbperpage; 2417 npages = logSize >> sbi->l2nbperpage;
2416 2418
2417 /* 2419 /*
2418 * log space: 2420 * log space:
2419 * 2421 *
2420 * page 0 - reserved; 2422 * page 0 - reserved;
2421 * page 1 - log superblock; 2423 * page 1 - log superblock;
2422 * page 2 - log data page: A SYNC log record is written 2424 * page 2 - log data page: A SYNC log record is written
2423 * into this page at logform time; 2425 * into this page at logform time;
2424 * pages 3-N - log data page: set to empty log data pages; 2426 * pages 3-N - log data page: set to empty log data pages;
2425 */ 2427 */
2426 /* 2428 /*
2427 * init log superblock: log page 1 2429 * init log superblock: log page 1
2428 */ 2430 */
2429 logsuper = (struct logsuper *) bp->l_ldata; 2431 logsuper = (struct logsuper *) bp->l_ldata;
2430 2432
2431 logsuper->magic = cpu_to_le32(LOGMAGIC); 2433 logsuper->magic = cpu_to_le32(LOGMAGIC);
2432 logsuper->version = cpu_to_le32(LOGVERSION); 2434 logsuper->version = cpu_to_le32(LOGVERSION);
2433 logsuper->state = cpu_to_le32(LOGREDONE); 2435 logsuper->state = cpu_to_le32(LOGREDONE);
2434 logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */ 2436 logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
2435 logsuper->size = cpu_to_le32(npages); 2437 logsuper->size = cpu_to_le32(npages);
2436 logsuper->bsize = cpu_to_le32(sbi->bsize); 2438 logsuper->bsize = cpu_to_le32(sbi->bsize);
2437 logsuper->l2bsize = cpu_to_le32(sbi->l2bsize); 2439 logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2438 logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE); 2440 logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2439 2441
2440 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2442 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2441 bp->l_blkno = logAddress + sbi->nbperpage; 2443 bp->l_blkno = logAddress + sbi->nbperpage;
2442 lbmStartIO(bp); 2444 lbmStartIO(bp);
2443 if ((rc = lbmIOWait(bp, 0))) 2445 if ((rc = lbmIOWait(bp, 0)))
2444 goto exit; 2446 goto exit;
2445 2447
2446 /* 2448 /*
2447 * init pages 2 to npages-1 as log data pages: 2449 * init pages 2 to npages-1 as log data pages:
2448 * 2450 *
2449 * log page sequence number (lpsn) initialization: 2451 * log page sequence number (lpsn) initialization:
2450 * 2452 *
2451 * pn: 0 1 2 3 n-1 2453 * pn: 0 1 2 3 n-1
2452 * +-----+-----+=====+=====+===.....===+=====+ 2454 * +-----+-----+=====+=====+===.....===+=====+
2453 * lspn: N-1 0 1 N-2 2455 * lspn: N-1 0 1 N-2
2454 * <--- N page circular file ----> 2456 * <--- N page circular file ---->
2455 * 2457 *
2456 * the N (= npages-2) data pages of the log is maintained as 2458 * the N (= npages-2) data pages of the log is maintained as
2457 * a circular file for the log records; 2459 * a circular file for the log records;
2458 * lpsn grows by 1 monotonically as each log page is written 2460 * lpsn grows by 1 monotonically as each log page is written
2459 * to the circular file of the log; 2461 * to the circular file of the log;
2460 * and setLogpage() will not reset the page number even if 2462 * and setLogpage() will not reset the page number even if
2461 * the eor is equal to LOGPHDRSIZE. In order for binary search 2463 * the eor is equal to LOGPHDRSIZE. In order for binary search
2462 * still work in find log end process, we have to simulate the 2464 * still work in find log end process, we have to simulate the
2463 * log wrap situation at the log format time. 2465 * log wrap situation at the log format time.
2464 * The 1st log page written will have the highest lpsn. Then 2466 * The 1st log page written will have the highest lpsn. Then
2465 * the succeeding log pages will have ascending order of 2467 * the succeeding log pages will have ascending order of
2466 * the lspn starting from 0, ... (N-2) 2468 * the lspn starting from 0, ... (N-2)
2467 */ 2469 */
2468 lp = (struct logpage *) bp->l_ldata; 2470 lp = (struct logpage *) bp->l_ldata;
2469 /* 2471 /*
2470 * initialize 1st log page to be written: lpsn = N - 1, 2472 * initialize 1st log page to be written: lpsn = N - 1,
2471 * write a SYNCPT log record is written to this page 2473 * write a SYNCPT log record is written to this page
2472 */ 2474 */
2473 lp->h.page = lp->t.page = cpu_to_le32(npages - 3); 2475 lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2474 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE); 2476 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2475 2477
2476 lrd_ptr = (struct lrd *) &lp->data; 2478 lrd_ptr = (struct lrd *) &lp->data;
2477 lrd_ptr->logtid = 0; 2479 lrd_ptr->logtid = 0;
2478 lrd_ptr->backchain = 0; 2480 lrd_ptr->backchain = 0;
2479 lrd_ptr->type = cpu_to_le16(LOG_SYNCPT); 2481 lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2480 lrd_ptr->length = 0; 2482 lrd_ptr->length = 0;
2481 lrd_ptr->log.syncpt.sync = 0; 2483 lrd_ptr->log.syncpt.sync = 0;
2482 2484
2483 bp->l_blkno += sbi->nbperpage; 2485 bp->l_blkno += sbi->nbperpage;
2484 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2486 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2485 lbmStartIO(bp); 2487 lbmStartIO(bp);
2486 if ((rc = lbmIOWait(bp, 0))) 2488 if ((rc = lbmIOWait(bp, 0)))
2487 goto exit; 2489 goto exit;
2488 2490
2489 /* 2491 /*
2490 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) 2492 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2491 */ 2493 */
2492 for (lspn = 0; lspn < npages - 3; lspn++) { 2494 for (lspn = 0; lspn < npages - 3; lspn++) {
2493 lp->h.page = lp->t.page = cpu_to_le32(lspn); 2495 lp->h.page = lp->t.page = cpu_to_le32(lspn);
2494 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); 2496 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2495 2497
2496 bp->l_blkno += sbi->nbperpage; 2498 bp->l_blkno += sbi->nbperpage;
2497 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2499 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2498 lbmStartIO(bp); 2500 lbmStartIO(bp);
2499 if ((rc = lbmIOWait(bp, 0))) 2501 if ((rc = lbmIOWait(bp, 0)))
2500 goto exit; 2502 goto exit;
2501 } 2503 }
2502 2504
2503 rc = 0; 2505 rc = 0;
2504 exit: 2506 exit:
2505 /* 2507 /*
2506 * finalize log 2508 * finalize log
2507 */ 2509 */
2508 /* release the buffer */ 2510 /* release the buffer */
2509 lbmFree(bp); 2511 lbmFree(bp);
2510 2512
2511 return rc; 2513 return rc;
2512 } 2514 }
2513 2515
2514 #ifdef CONFIG_JFS_STATISTICS 2516 #ifdef CONFIG_JFS_STATISTICS
2515 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, 2517 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
2516 int *eof, void *data) 2518 int *eof, void *data)
2517 { 2519 {
2518 int len = 0; 2520 int len = 0;
2519 off_t begin; 2521 off_t begin;
2520 2522
2521 len += sprintf(buffer, 2523 len += sprintf(buffer,
2522 "JFS Logmgr stats\n" 2524 "JFS Logmgr stats\n"
2523 "================\n" 2525 "================\n"
2524 "commits = %d\n" 2526 "commits = %d\n"
2525 "writes submitted = %d\n" 2527 "writes submitted = %d\n"
2526 "writes completed = %d\n" 2528 "writes completed = %d\n"
2527 "full pages submitted = %d\n" 2529 "full pages submitted = %d\n"
2528 "partial pages submitted = %d\n", 2530 "partial pages submitted = %d\n",
2529 lmStat.commit, 2531 lmStat.commit,
2530 lmStat.submitted, 2532 lmStat.submitted,
2531 lmStat.pagedone, 2533 lmStat.pagedone,
2532 lmStat.full_page, 2534 lmStat.full_page,
2533 lmStat.partial_page); 2535 lmStat.partial_page);
2534 2536
2535 begin = offset; 2537 begin = offset;
2536 *start = buffer + begin; 2538 *start = buffer + begin;
2537 len -= begin; 2539 len -= begin;
2538 2540
2539 if (len > length) 2541 if (len > length)
2540 len = length; 2542 len = length;
2541 else 2543 else
2542 *eof = 1; 2544 *eof = 1;
2543 2545
2544 if (len < 0) 2546 if (len < 0)
1 /* 1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2004 2 * Copyright (C) International Business Machines Corp., 2000-2004
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 #ifndef _H_JFS_LOGMGR 19 #ifndef _H_JFS_LOGMGR
20 #define _H_JFS_LOGMGR 20 #define _H_JFS_LOGMGR
21 21
22 #include "jfs_filsys.h" 22 #include "jfs_filsys.h"
23 #include "jfs_lock.h" 23 #include "jfs_lock.h"
24 24
25 /* 25 /*
26 * log manager configuration parameters 26 * log manager configuration parameters
27 */ 27 */
28 28
29 /* log page size */ 29 /* log page size */
30 #define LOGPSIZE 4096 30 #define LOGPSIZE 4096
31 #define L2LOGPSIZE 12 31 #define L2LOGPSIZE 12
32 32
33 #define LOGPAGES 16 /* Log pages per mounted file system */ 33 #define LOGPAGES 16 /* Log pages per mounted file system */
34 34
35 /* 35 /*
36 * log logical volume 36 * log logical volume
37 * 37 *
38 * a log is used to make the commit operation on journalled 38 * a log is used to make the commit operation on journalled
39 * files within the same logical volume group atomic. 39 * files within the same logical volume group atomic.
40 * a log is implemented with a logical volume. 40 * a log is implemented with a logical volume.
41 * there is one log per logical volume group. 41 * there is one log per logical volume group.
42 * 42 *
43 * block 0 of the log logical volume is not used (ipl etc). 43 * block 0 of the log logical volume is not used (ipl etc).
44 * block 1 contains a log "superblock" and is used by logFormat(), 44 * block 1 contains a log "superblock" and is used by logFormat(),
45 * lmLogInit(), lmLogShutdown(), and logRedo() to record status 45 * lmLogInit(), lmLogShutdown(), and logRedo() to record status
46 * of the log but is not otherwise used during normal processing. 46 * of the log but is not otherwise used during normal processing.
47 * blocks 2 - (N-1) are used to contain log records. 47 * blocks 2 - (N-1) are used to contain log records.
48 * 48 *
49 * when a volume group is varied-on-line, logRedo() must have 49 * when a volume group is varied-on-line, logRedo() must have
50 * been executed before the file systems (logical volumes) in 50 * been executed before the file systems (logical volumes) in
51 * the volume group can be mounted. 51 * the volume group can be mounted.
52 */ 52 */
53 /* 53 /*
54 * log superblock (block 1 of logical volume) 54 * log superblock (block 1 of logical volume)
55 */ 55 */
56 #define LOGSUPER_B 1 56 #define LOGSUPER_B 1
57 #define LOGSTART_B 2 57 #define LOGSTART_B 2
58 58
59 #define LOGMAGIC 0x87654321 59 #define LOGMAGIC 0x87654321
60 #define LOGVERSION 1 60 #define LOGVERSION 1
61 61
62 #define MAX_ACTIVE 128 /* Max active file systems sharing log */ 62 #define MAX_ACTIVE 128 /* Max active file systems sharing log */
63 63
64 struct logsuper { 64 struct logsuper {
65 __le32 magic; /* 4: log lv identifier */ 65 __le32 magic; /* 4: log lv identifier */
66 __le32 version; /* 4: version number */ 66 __le32 version; /* 4: version number */
67 __le32 serial; /* 4: log open/mount counter */ 67 __le32 serial; /* 4: log open/mount counter */
68 __le32 size; /* 4: size in number of LOGPSIZE blocks */ 68 __le32 size; /* 4: size in number of LOGPSIZE blocks */
69 __le32 bsize; /* 4: logical block size in byte */ 69 __le32 bsize; /* 4: logical block size in byte */
70 __le32 l2bsize; /* 4: log2 of bsize */ 70 __le32 l2bsize; /* 4: log2 of bsize */
71 71
72 __le32 flag; /* 4: option */ 72 __le32 flag; /* 4: option */
73 __le32 state; /* 4: state - see below */ 73 __le32 state; /* 4: state - see below */
74 74
75 __le32 end; /* 4: addr of last log record set by logredo */ 75 __le32 end; /* 4: addr of last log record set by logredo */
76 char uuid[16]; /* 16: 128-bit journal uuid */ 76 char uuid[16]; /* 16: 128-bit journal uuid */
77 char label[16]; /* 16: journal label */ 77 char label[16]; /* 16: journal label */
78 struct { 78 struct {
79 char uuid[16]; 79 char uuid[16];
80 } active[MAX_ACTIVE]; /* 2048: active file systems list */ 80 } active[MAX_ACTIVE]; /* 2048: active file systems list */
81 }; 81 };
82 82
83 #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 83 #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
84 84
85 /* log flag: commit option (see jfs_filsys.h) */ 85 /* log flag: commit option (see jfs_filsys.h) */
86 86
87 /* log state */ 87 /* log state */
88 #define LOGMOUNT 0 /* log mounted by lmLogInit() */ 88 #define LOGMOUNT 0 /* log mounted by lmLogInit() */
89 #define LOGREDONE 1 /* log shutdown by lmLogShutdown(). 89 #define LOGREDONE 1 /* log shutdown by lmLogShutdown().
90 * log redo completed by logredo(). 90 * log redo completed by logredo().
91 */ 91 */
92 #define LOGWRAP 2 /* log wrapped */ 92 #define LOGWRAP 2 /* log wrapped */
93 #define LOGREADERR 3 /* log read error detected in logredo() */ 93 #define LOGREADERR 3 /* log read error detected in logredo() */
94 94
95 95
96 /* 96 /*
97 * log logical page 97 * log logical page
98 * 98 *
99 * (this comment should be rewritten !) 99 * (this comment should be rewritten !)
100 * the header and trailer structures (h,t) will normally have 100 * the header and trailer structures (h,t) will normally have
101 * the same page and eor value. 101 * the same page and eor value.
102 * An exception to this occurs when a complete page write is not 102 * An exception to this occurs when a complete page write is not
103 * accomplished on a power failure. Since the hardware may "split write" 103 * accomplished on a power failure. Since the hardware may "split write"
104 * sectors in the page, any out of order sequence may occur during powerfail 104 * sectors in the page, any out of order sequence may occur during powerfail
105 * and needs to be recognized during log replay. The xor value is 105 * and needs to be recognized during log replay. The xor value is
106 * an "exclusive or" of all log words in the page up to eor. This 106 * an "exclusive or" of all log words in the page up to eor. This
107 * 32 bit eor is stored with the top 16 bits in the header and the 107 * 32 bit eor is stored with the top 16 bits in the header and the
108 * bottom 16 bits in the trailer. logredo can easily recognize pages 108 * bottom 16 bits in the trailer. logredo can easily recognize pages
109 * that were not completed by reconstructing this eor and checking 109 * that were not completed by reconstructing this eor and checking
110 * the log page. 110 * the log page.
111 * 111 *
112 * Previous versions of the operating system did not allow split 112 * Previous versions of the operating system did not allow split
113 * writes and detected partially written records in logredo by 113 * writes and detected partially written records in logredo by
114 * ordering the updates to the header, trailer, and the move of data 114 * ordering the updates to the header, trailer, and the move of data
115 * into the logdata area. The order: (1) data is moved (2) header 115 * into the logdata area. The order: (1) data is moved (2) header
116 * is updated (3) trailer is updated. In logredo, when the header 116 * is updated (3) trailer is updated. In logredo, when the header
117 * differed from the trailer, the header and trailer were reconciled 117 * differed from the trailer, the header and trailer were reconciled
118 * as follows: if h.page != t.page they were set to the smaller of 118 * as follows: if h.page != t.page they were set to the smaller of
119 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) 119 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
120 * h.eor != t.eor they were set to the smaller of their two values. 120 * h.eor != t.eor they were set to the smaller of their two values.
121 */ 121 */
122 struct logpage { 122 struct logpage {
123 struct { /* header */ 123 struct { /* header */
124 __le32 page; /* 4: log sequence page number */ 124 __le32 page; /* 4: log sequence page number */
125 __le16 rsrvd; /* 2: */ 125 __le16 rsrvd; /* 2: */
126 __le16 eor; /* 2: end-of-log offset of lasrt record write */ 126 __le16 eor; /* 2: end-of-log offset of lasrt record write */
127 } h; 127 } h;
128 128
129 __le32 data[LOGPSIZE / 4 - 4]; /* log record area */ 129 __le32 data[LOGPSIZE / 4 - 4]; /* log record area */
130 130
131 struct { /* trailer */ 131 struct { /* trailer */
132 __le32 page; /* 4: normally the same as h.page */ 132 __le32 page; /* 4: normally the same as h.page */
133 __le16 rsrvd; /* 2: */ 133 __le16 rsrvd; /* 2: */
134 __le16 eor; /* 2: normally the same as h.eor */ 134 __le16 eor; /* 2: normally the same as h.eor */
135 } t; 135 } t;
136 }; 136 };
137 137
138 #define LOGPHDRSIZE 8 /* log page header size */ 138 #define LOGPHDRSIZE 8 /* log page header size */
139 #define LOGPTLRSIZE 8 /* log page trailer size */ 139 #define LOGPTLRSIZE 8 /* log page trailer size */
140 140
141 141
142 /* 142 /*
143 * log record 143 * log record
144 * 144 *
145 * (this comment should be rewritten !) 145 * (this comment should be rewritten !)
146 * jfs uses only "after" log records (only a single writer is allowed 146 * jfs uses only "after" log records (only a single writer is allowed
147 * in a page, pages are written to temporary paging space if 147 * in a page, pages are written to temporary paging space if
148 * if they must be written to disk before commit, and i/o is 148 * if they must be written to disk before commit, and i/o is
149 * scheduled for modified pages to their home location after 149 * scheduled for modified pages to their home location after
150 * the log records containing the after values and the commit 150 * the log records containing the after values and the commit
151 * record is written to the log on disk, undo discards the copy 151 * record is written to the log on disk, undo discards the copy
152 * in main-memory.) 152 * in main-memory.)
153 * 153 *
154 * a log record consists of a data area of variable length followed by 154 * a log record consists of a data area of variable length followed by
155 * a descriptor of fixed size LOGRDSIZE bytes. 155 * a descriptor of fixed size LOGRDSIZE bytes.
156 * the data area is rounded up to an integral number of 4-bytes and 156 * the data area is rounded up to an integral number of 4-bytes and
157 * must be no longer than LOGPSIZE. 157 * must be no longer than LOGPSIZE.
158 * the descriptor is of size of multiple of 4-bytes and aligned on a 158 * the descriptor is of size of multiple of 4-bytes and aligned on a
159 * 4-byte boundary. 159 * 4-byte boundary.
160 * records are packed one after the other in the data area of log pages. 160 * records are packed one after the other in the data area of log pages.
161 * (sometimes a DUMMY record is inserted so that at least one record ends 161 * (sometimes a DUMMY record is inserted so that at least one record ends
162 * on every page or the longest record is placed on at most two pages). 162 * on every page or the longest record is placed on at most two pages).
163 * the field eor in page header/trailer points to the byte following 163 * the field eor in page header/trailer points to the byte following
164 * the last record on a page. 164 * the last record on a page.
165 */ 165 */
166 166
167 /* log record types */ 167 /* log record types */
168 #define LOG_COMMIT 0x8000 168 #define LOG_COMMIT 0x8000
169 #define LOG_SYNCPT 0x4000 169 #define LOG_SYNCPT 0x4000
170 #define LOG_MOUNT 0x2000 170 #define LOG_MOUNT 0x2000
171 #define LOG_REDOPAGE 0x0800 171 #define LOG_REDOPAGE 0x0800
172 #define LOG_NOREDOPAGE 0x0080 172 #define LOG_NOREDOPAGE 0x0080
173 #define LOG_NOREDOINOEXT 0x0040 173 #define LOG_NOREDOINOEXT 0x0040
174 #define LOG_UPDATEMAP 0x0008 174 #define LOG_UPDATEMAP 0x0008
175 #define LOG_NOREDOFILE 0x0001 175 #define LOG_NOREDOFILE 0x0001
176 176
177 /* REDOPAGE/NOREDOPAGE log record data type */ 177 /* REDOPAGE/NOREDOPAGE log record data type */
178 #define LOG_INODE 0x0001 178 #define LOG_INODE 0x0001
179 #define LOG_XTREE 0x0002 179 #define LOG_XTREE 0x0002
180 #define LOG_DTREE 0x0004 180 #define LOG_DTREE 0x0004
181 #define LOG_BTROOT 0x0010 181 #define LOG_BTROOT 0x0010
182 #define LOG_EA 0x0020 182 #define LOG_EA 0x0020
183 #define LOG_ACL 0x0040 183 #define LOG_ACL 0x0040
184 #define LOG_DATA 0x0080 184 #define LOG_DATA 0x0080
185 #define LOG_NEW 0x0100 185 #define LOG_NEW 0x0100
186 #define LOG_EXTEND 0x0200 186 #define LOG_EXTEND 0x0200
187 #define LOG_RELOCATE 0x0400 187 #define LOG_RELOCATE 0x0400
188 #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ 188 #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */
189 189
190 /* UPDATEMAP log record descriptor type */ 190 /* UPDATEMAP log record descriptor type */
191 #define LOG_ALLOCXADLIST 0x0080 191 #define LOG_ALLOCXADLIST 0x0080
192 #define LOG_ALLOCPXDLIST 0x0040 192 #define LOG_ALLOCPXDLIST 0x0040
193 #define LOG_ALLOCXAD 0x0020 193 #define LOG_ALLOCXAD 0x0020
194 #define LOG_ALLOCPXD 0x0010 194 #define LOG_ALLOCPXD 0x0010
195 #define LOG_FREEXADLIST 0x0008 195 #define LOG_FREEXADLIST 0x0008
196 #define LOG_FREEPXDLIST 0x0004 196 #define LOG_FREEPXDLIST 0x0004
197 #define LOG_FREEXAD 0x0002 197 #define LOG_FREEXAD 0x0002
198 #define LOG_FREEPXD 0x0001 198 #define LOG_FREEPXD 0x0001
199 199
200 200
201 struct lrd { 201 struct lrd {
202 /* 202 /*
203 * type independent area 203 * type independent area
204 */ 204 */
205 __le32 logtid; /* 4: log transaction identifier */ 205 __le32 logtid; /* 4: log transaction identifier */
206 __le32 backchain; /* 4: ptr to prev record of same transaction */ 206 __le32 backchain; /* 4: ptr to prev record of same transaction */
207 __le16 type; /* 2: record type */ 207 __le16 type; /* 2: record type */
208 __le16 length; /* 2: length of data in record (in byte) */ 208 __le16 length; /* 2: length of data in record (in byte) */
209 __le32 aggregate; /* 4: file system lv/aggregate */ 209 __le32 aggregate; /* 4: file system lv/aggregate */
210 /* (16) */ 210 /* (16) */
211 211
212 /* 212 /*
213 * type dependent area (20) 213 * type dependent area (20)
214 */ 214 */
215 union { 215 union {
216 216
217 /* 217 /*
218 * COMMIT: commit 218 * COMMIT: commit
219 * 219 *
220 * transaction commit: no type-dependent information; 220 * transaction commit: no type-dependent information;
221 */ 221 */
222 222
223 /* 223 /*
224 * REDOPAGE: after-image 224 * REDOPAGE: after-image
225 * 225 *
226 * apply after-image; 226 * apply after-image;
227 * 227 *
228 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; 228 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
229 */ 229 */
230 struct { 230 struct {
231 __le32 fileset; /* 4: fileset number */ 231 __le32 fileset; /* 4: fileset number */
232 __le32 inode; /* 4: inode number */ 232 __le32 inode; /* 4: inode number */
233 __le16 type; /* 2: REDOPAGE record type */ 233 __le16 type; /* 2: REDOPAGE record type */
234 __le16 l2linesize; /* 2: log2 of line size */ 234 __le16 l2linesize; /* 2: log2 of line size */
235 pxd_t pxd; /* 8: on-disk page pxd */ 235 pxd_t pxd; /* 8: on-disk page pxd */
236 } redopage; /* (20) */ 236 } redopage; /* (20) */
237 237
238 /* 238 /*
239 * NOREDOPAGE: the page is freed 239 * NOREDOPAGE: the page is freed
240 * 240 *
241 * do not apply after-image records which precede this record 241 * do not apply after-image records which precede this record
242 * in the log with the same page block number to this page. 242 * in the log with the same page block number to this page.
243 * 243 *
244 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; 244 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
245 */ 245 */
246 struct { 246 struct {
247 __le32 fileset; /* 4: fileset number */ 247 __le32 fileset; /* 4: fileset number */
248 __le32 inode; /* 4: inode number */ 248 __le32 inode; /* 4: inode number */
249 __le16 type; /* 2: NOREDOPAGE record type */ 249 __le16 type; /* 2: NOREDOPAGE record type */
250 __le16 rsrvd; /* 2: reserved */ 250 __le16 rsrvd; /* 2: reserved */
251 pxd_t pxd; /* 8: on-disk page pxd */ 251 pxd_t pxd; /* 8: on-disk page pxd */
252 } noredopage; /* (20) */ 252 } noredopage; /* (20) */
253 253
254 /* 254 /*
255 * UPDATEMAP: update block allocation map 255 * UPDATEMAP: update block allocation map
256 * 256 *
257 * either in-line PXD, 257 * either in-line PXD,
258 * or out-of-line XADLIST; 258 * or out-of-line XADLIST;
259 * 259 *
260 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; 260 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
261 */ 261 */
262 struct { 262 struct {
263 __le32 fileset; /* 4: fileset number */ 263 __le32 fileset; /* 4: fileset number */
264 __le32 inode; /* 4: inode number */ 264 __le32 inode; /* 4: inode number */
265 __le16 type; /* 2: UPDATEMAP record type */ 265 __le16 type; /* 2: UPDATEMAP record type */
266 __le16 nxd; /* 2: number of extents */ 266 __le16 nxd; /* 2: number of extents */
267 pxd_t pxd; /* 8: pxd */ 267 pxd_t pxd; /* 8: pxd */
268 } updatemap; /* (20) */ 268 } updatemap; /* (20) */
269 269
270 /* 270 /*
271 * NOREDOINOEXT: the inode extent is freed 271 * NOREDOINOEXT: the inode extent is freed
272 * 272 *
273 * do not apply after-image records which precede this 273 * do not apply after-image records which precede this
274 * record in the log with the any of the 4 page block 274 * record in the log with the any of the 4 page block
275 * numbers in this inode extent. 275 * numbers in this inode extent.
276 * 276 *
277 * NOTE: The fileset and pxd fields MUST remain in 277 * NOTE: The fileset and pxd fields MUST remain in
278 * the same fields in the REDOPAGE record format. 278 * the same fields in the REDOPAGE record format.
279 * 279 *
280 */ 280 */
281 struct { 281 struct {
282 __le32 fileset; /* 4: fileset number */ 282 __le32 fileset; /* 4: fileset number */
283 __le32 iagnum; /* 4: IAG number */ 283 __le32 iagnum; /* 4: IAG number */
284 __le32 inoext_idx; /* 4: inode extent index */ 284 __le32 inoext_idx; /* 4: inode extent index */
285 pxd_t pxd; /* 8: on-disk page pxd */ 285 pxd_t pxd; /* 8: on-disk page pxd */
286 } noredoinoext; /* (20) */ 286 } noredoinoext; /* (20) */
287 287
288 /* 288 /*
289 * SYNCPT: log sync point 289 * SYNCPT: log sync point
290 * 290 *
291 * replay log upto syncpt address specified; 291 * replay log upto syncpt address specified;
292 */ 292 */
293 struct { 293 struct {
294 __le32 sync; /* 4: syncpt address (0 = here) */ 294 __le32 sync; /* 4: syncpt address (0 = here) */
295 } syncpt; 295 } syncpt;
296 296
297 /* 297 /*
298 * MOUNT: file system mount 298 * MOUNT: file system mount
299 * 299 *
300 * file system mount: no type-dependent information; 300 * file system mount: no type-dependent information;
301 */ 301 */
302 302
303 /* 303 /*
304 * ? FREEXTENT: free specified extent(s) 304 * ? FREEXTENT: free specified extent(s)
305 * 305 *
306 * free specified extent(s) from block allocation map 306 * free specified extent(s) from block allocation map
307 * N.B.: nextents should be length of data/sizeof(xad_t) 307 * N.B.: nextents should be length of data/sizeof(xad_t)
308 */ 308 */
309 struct { 309 struct {
310 __le32 type; /* 4: FREEXTENT record type */ 310 __le32 type; /* 4: FREEXTENT record type */
311 __le32 nextent; /* 4: number of extents */ 311 __le32 nextent; /* 4: number of extents */
312 312
313 /* data: PXD or XAD list */ 313 /* data: PXD or XAD list */
314 } freextent; 314 } freextent;
315 315
316 /* 316 /*
317 * ? NOREDOFILE: this file is freed 317 * ? NOREDOFILE: this file is freed
318 * 318 *
319 * do not apply records which precede this record in the log 319 * do not apply records which precede this record in the log
320 * with the same inode number. 320 * with the same inode number.
321 * 321 *
322 * NOREDILE must be the first to be written at commit 322 * NOREDILE must be the first to be written at commit
323 * (last to be read in logredo()) - it prevents 323 * (last to be read in logredo()) - it prevents
324 * replay of preceding updates of all preceding generations 324 * replay of preceding updates of all preceding generations
325 * of the inumber esp. the on-disk inode itself, 325 * of the inumber esp. the on-disk inode itself,
326 * but does NOT prevent 326 * but does NOT prevent
327 * replay of the 327 * replay of the
328 */ 328 */
329 struct { 329 struct {
330 __le32 fileset; /* 4: fileset number */ 330 __le32 fileset; /* 4: fileset number */
331 __le32 inode; /* 4: inode number */ 331 __le32 inode; /* 4: inode number */
332 } noredofile; 332 } noredofile;
333 333
334 /* 334 /*
335 * ? NEWPAGE: 335 * ? NEWPAGE:
336 * 336 *
337 * metadata type dependent 337 * metadata type dependent
338 */ 338 */
339 struct { 339 struct {
340 __le32 fileset; /* 4: fileset number */ 340 __le32 fileset; /* 4: fileset number */
341 __le32 inode; /* 4: inode number */ 341 __le32 inode; /* 4: inode number */
342 __le32 type; /* 4: NEWPAGE record type */ 342 __le32 type; /* 4: NEWPAGE record type */
343 pxd_t pxd; /* 8: on-disk page pxd */ 343 pxd_t pxd; /* 8: on-disk page pxd */
344 } newpage; 344 } newpage;
345 345
346 /* 346 /*
347 * ? DUMMY: filler 347 * ? DUMMY: filler
348 * 348 *
349 * no type-dependent information 349 * no type-dependent information
350 */ 350 */
351 } log; 351 } log;
352 }; /* (36) */ 352 }; /* (36) */
353 353
354 #define LOGRDSIZE (sizeof(struct lrd)) 354 #define LOGRDSIZE (sizeof(struct lrd))
355 355
356 /* 356 /*
357 * line vector descriptor 357 * line vector descriptor
358 */ 358 */
359 struct lvd { 359 struct lvd {
360 __le16 offset; 360 __le16 offset;
361 __le16 length; 361 __le16 length;
362 }; 362 };
363 363
364 364
365 /* 365 /*
366 * log logical volume 366 * log logical volume
367 */ 367 */
368 struct jfs_log { 368 struct jfs_log {
369 369
370 struct list_head sb_list;/* This is used to sync metadata 370 struct list_head sb_list;/* This is used to sync metadata
371 * before writing syncpt. 371 * before writing syncpt.
372 */ 372 */
373 struct list_head journal_list; /* Global list */ 373 struct list_head journal_list; /* Global list */
374 struct block_device *bdev; /* 4: log lv pointer */ 374 struct block_device *bdev; /* 4: log lv pointer */
375 int serial; /* 4: log mount serial number */ 375 int serial; /* 4: log mount serial number */
376 376
377 s64 base; /* @8: log extent address (inline log ) */ 377 s64 base; /* @8: log extent address (inline log ) */
378 int size; /* 4: log size in log page (in page) */ 378 int size; /* 4: log size in log page (in page) */
379 int l2bsize; /* 4: log2 of bsize */ 379 int l2bsize; /* 4: log2 of bsize */
380 380
381 long flag; /* 4: flag */ 381 long flag; /* 4: flag */
382 382
383 struct lbuf *lbuf_free; /* 4: free lbufs */ 383 struct lbuf *lbuf_free; /* 4: free lbufs */
384 wait_queue_head_t free_wait; /* 4: */ 384 wait_queue_head_t free_wait; /* 4: */
385 385
386 /* log write */ 386 /* log write */
387 int logtid; /* 4: log tid */ 387 int logtid; /* 4: log tid */
388 int page; /* 4: page number of eol page */ 388 int page; /* 4: page number of eol page */
389 int eor; /* 4: eor of last record in eol page */ 389 int eor; /* 4: eor of last record in eol page */
390 struct lbuf *bp; /* 4: current log page buffer */ 390 struct lbuf *bp; /* 4: current log page buffer */
391 391
392 struct semaphore loglock; /* 4: log write serialization lock */ 392 struct semaphore loglock; /* 4: log write serialization lock */
393 393
394 /* syncpt */ 394 /* syncpt */
395 int nextsync; /* 4: bytes to write before next syncpt */ 395 int nextsync; /* 4: bytes to write before next syncpt */
396 int active; /* 4: */ 396 int active; /* 4: */
397 wait_queue_head_t syncwait; /* 4: */ 397 wait_queue_head_t syncwait; /* 4: */
398 398
399 /* commit */ 399 /* commit */
400 uint cflag; /* 4: */ 400 uint cflag; /* 4: */
401 struct list_head cqueue; /* FIFO commit queue */ 401 struct list_head cqueue; /* FIFO commit queue */
402 struct tblock *flush_tblk; /* tblk we're waiting on for flush */ 402 struct tblock *flush_tblk; /* tblk we're waiting on for flush */
403 int gcrtc; /* 4: GC_READY transaction count */ 403 int gcrtc; /* 4: GC_READY transaction count */
404 struct tblock *gclrt; /* 4: latest GC_READY transaction */ 404 struct tblock *gclrt; /* 4: latest GC_READY transaction */
405 spinlock_t gclock; /* 4: group commit lock */ 405 spinlock_t gclock; /* 4: group commit lock */
406 int logsize; /* 4: log data area size in byte */ 406 int logsize; /* 4: log data area size in byte */
407 int lsn; /* 4: end-of-log */ 407 int lsn; /* 4: end-of-log */
408 int clsn; /* 4: clsn */ 408 int clsn; /* 4: clsn */
409 int syncpt; /* 4: addr of last syncpt record */ 409 int syncpt; /* 4: addr of last syncpt record */
410 int sync; /* 4: addr from last logsync() */ 410 int sync; /* 4: addr from last logsync() */
411 struct list_head synclist; /* 8: logsynclist anchor */ 411 struct list_head synclist; /* 8: logsynclist anchor */
412 spinlock_t synclock; /* 4: synclist lock */ 412 spinlock_t synclock; /* 4: synclist lock */
413 struct lbuf *wqueue; /* 4: log pageout queue */ 413 struct lbuf *wqueue; /* 4: log pageout queue */
414 int count; /* 4: count */ 414 int count; /* 4: count */
415 char uuid[16]; /* 16: 128-bit uuid of log device */ 415 char uuid[16]; /* 16: 128-bit uuid of log device */
416 416
417 int no_integrity; /* 3: flag to disable journaling to disk */ 417 int no_integrity; /* 3: flag to disable journaling to disk */
418 }; 418 };
419 419
420 /* 420 /*
421 * Log flag 421 * Log flag
422 */ 422 */
423 #define log_INLINELOG 1 423 #define log_INLINELOG 1
424 #define log_SYNCBARRIER 2 424 #define log_SYNCBARRIER 2
425 #define log_QUIESCE 3 425 #define log_QUIESCE 3
426 #define log_FLUSH 4 426 #define log_FLUSH 4
427 427
428 /* 428 /*
429 * group commit flag 429 * group commit flag
430 */ 430 */
431 /* jfs_log */ 431 /* jfs_log */
432 #define logGC_PAGEOUT 0x00000001 432 #define logGC_PAGEOUT 0x00000001
433 433
434 /* tblock/lbuf */ 434 /* tblock/lbuf */
435 #define tblkGC_QUEUE 0x0001 435 #define tblkGC_QUEUE 0x0001
436 #define tblkGC_READY 0x0002 436 #define tblkGC_READY 0x0002
437 #define tblkGC_COMMIT 0x0004 437 #define tblkGC_COMMIT 0x0004
438 #define tblkGC_COMMITTED 0x0008 438 #define tblkGC_COMMITTED 0x0008
439 #define tblkGC_EOP 0x0010 439 #define tblkGC_EOP 0x0010
440 #define tblkGC_FREE 0x0020 440 #define tblkGC_FREE 0x0020
441 #define tblkGC_LEADER 0x0040 441 #define tblkGC_LEADER 0x0040
442 #define tblkGC_ERROR 0x0080 442 #define tblkGC_ERROR 0x0080
443 #define tblkGC_LAZY 0x0100 // D230860 443 #define tblkGC_LAZY 0x0100 // D230860
444 #define tblkGC_UNLOCKED 0x0200 // D230860 444 #define tblkGC_UNLOCKED 0x0200 // D230860
445 445
446 /* 446 /*
447 * log cache buffer header 447 * log cache buffer header
448 */ 448 */
449 struct lbuf { 449 struct lbuf {
450 struct jfs_log *l_log; /* 4: log associated with buffer */ 450 struct jfs_log *l_log; /* 4: log associated with buffer */
451 451
452 /* 452 /*
453 * data buffer base area 453 * data buffer base area
454 */ 454 */
455 uint l_flag; /* 4: pageout control flags */ 455 uint l_flag; /* 4: pageout control flags */
456 456
457 struct lbuf *l_wqnext; /* 4: write queue link */ 457 struct lbuf *l_wqnext; /* 4: write queue link */
458 struct lbuf *l_freelist; /* 4: freelistlink */ 458 struct lbuf *l_freelist; /* 4: freelistlink */
459 459
460 int l_pn; /* 4: log page number */ 460 int l_pn; /* 4: log page number */
461 int l_eor; /* 4: log record eor */ 461 int l_eor; /* 4: log record eor */
462 int l_ceor; /* 4: committed log record eor */ 462 int l_ceor; /* 4: committed log record eor */
463 463
464 s64 l_blkno; /* 8: log page block number */ 464 s64 l_blkno; /* 8: log page block number */
465 caddr_t l_ldata; /* 4: data page */ 465 caddr_t l_ldata; /* 4: data page */
466 struct page *l_page; /* The page itself */ 466 struct page *l_page; /* The page itself */
467 uint l_offset; /* Offset of l_ldata within the page */ 467 uint l_offset; /* Offset of l_ldata within the page */
468 468
469 wait_queue_head_t l_ioevent; /* 4: i/o done event */ 469 wait_queue_head_t l_ioevent; /* 4: i/o done event */
470 }; 470 };
471 471
472 /* Reuse l_freelist for redrive list */ 472 /* Reuse l_freelist for redrive list */
473 #define l_redrive_next l_freelist 473 #define l_redrive_next l_freelist
474 474
475 /* 475 /*
476 * logsynclist block 476 * logsynclist block
477 * 477 *
478 * common logsyncblk prefix for jbuf_t and tblock 478 * common logsyncblk prefix for jbuf_t and tblock
479 */ 479 */
480 struct logsyncblk { 480 struct logsyncblk {
481 u16 xflag; /* flags */ 481 u16 xflag; /* flags */
482 u16 flag; /* only meaninful in tblock */ 482 u16 flag; /* only meaninful in tblock */
483 lid_t lid; /* lock id */ 483 lid_t lid; /* lock id */
484 s32 lsn; /* log sequence number */ 484 s32 lsn; /* log sequence number */
485 struct list_head synclist; /* log sync list link */ 485 struct list_head synclist; /* log sync list link */
486 }; 486 };
487 487
488 /* 488 /*
489 * logsynclist serialization (per log) 489 * logsynclist serialization (per log)
490 */ 490 */
491 491
492 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) 492 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
493 #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) 493 #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
494 #define LOGSYNC_UNLOCK(log, flags) \ 494 #define LOGSYNC_UNLOCK(log, flags) \
495 spin_unlock_irqrestore(&(log)->synclock, flags) 495 spin_unlock_irqrestore(&(log)->synclock, flags)
496 496
497 /* compute the difference in bytes of lsn from sync point */ 497 /* compute the difference in bytes of lsn from sync point */
498 #define logdiff(diff, lsn, log)\ 498 #define logdiff(diff, lsn, log)\
499 {\ 499 {\
500 diff = (lsn) - (log)->syncpt;\ 500 diff = (lsn) - (log)->syncpt;\
501 if (diff < 0)\ 501 if (diff < 0)\
502 diff += (log)->logsize;\ 502 diff += (log)->logsize;\
503 } 503 }
504 504
505 extern int lmLogOpen(struct super_block *sb); 505 extern int lmLogOpen(struct super_block *sb);
506 extern int lmLogClose(struct super_block *sb); 506 extern int lmLogClose(struct super_block *sb);
507 extern int lmLogShutdown(struct jfs_log * log); 507 extern int lmLogShutdown(struct jfs_log * log);
508 extern int lmLogInit(struct jfs_log * log); 508 extern int lmLogInit(struct jfs_log * log);
509 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); 509 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
510 extern int lmGroupCommit(struct jfs_log *, struct tblock *); 510 extern int lmGroupCommit(struct jfs_log *, struct tblock *);
511 extern int jfsIOWait(void *); 511 extern int jfsIOWait(void *);
512 extern void jfs_flush_journal(struct jfs_log * log, int wait); 512 extern void jfs_flush_journal(struct jfs_log * log, int wait);
513 extern void jfs_syncpt(struct jfs_log *log); 513 extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
514 514
515 #endif /* _H_JFS_LOGMGR */ 515 #endif /* _H_JFS_LOGMGR */
516 516
1 /* 1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2005 2 * Copyright (C) International Business Machines Corp., 2000-2005
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20 /* 20 /*
21 * jfs_txnmgr.c: transaction manager 21 * jfs_txnmgr.c: transaction manager
22 * 22 *
23 * notes: 23 * notes:
24 * transaction starts with txBegin() and ends with txCommit() 24 * transaction starts with txBegin() and ends with txCommit()
25 * or txAbort(). 25 * or txAbort().
26 * 26 *
27 * tlock is acquired at the time of update; 27 * tlock is acquired at the time of update;
28 * (obviate scan at commit time for xtree and dtree) 28 * (obviate scan at commit time for xtree and dtree)
29 * tlock and mp points to each other; 29 * tlock and mp points to each other;
30 * (no hashlist for mp -> tlock). 30 * (no hashlist for mp -> tlock).
31 * 31 *
32 * special cases: 32 * special cases:
33 * tlock on in-memory inode: 33 * tlock on in-memory inode:
34 * in-place tlock in the in-memory inode itself; 34 * in-place tlock in the in-memory inode itself;
35 * converted to page lock by iWrite() at commit time. 35 * converted to page lock by iWrite() at commit time.
36 * 36 *
37 * tlock during write()/mmap() under anonymous transaction (tid = 0): 37 * tlock during write()/mmap() under anonymous transaction (tid = 0):
38 * transferred (?) to transaction at commit time. 38 * transferred (?) to transaction at commit time.
39 * 39 *
40 * use the page itself to update allocation maps 40 * use the page itself to update allocation maps
41 * (obviate intermediate replication of allocation/deallocation data) 41 * (obviate intermediate replication of allocation/deallocation data)
42 * hold on to mp+lock thru update of maps 42 * hold on to mp+lock thru update of maps
43 */ 43 */
44 44
45 #include <linux/fs.h> 45 #include <linux/fs.h>
46 #include <linux/vmalloc.h> 46 #include <linux/vmalloc.h>
47 #include <linux/smp_lock.h> 47 #include <linux/smp_lock.h>
48 #include <linux/completion.h> 48 #include <linux/completion.h>
49 #include <linux/suspend.h> 49 #include <linux/suspend.h>
50 #include <linux/module.h> 50 #include <linux/module.h>
51 #include <linux/moduleparam.h> 51 #include <linux/moduleparam.h>
52 #include "jfs_incore.h" 52 #include "jfs_incore.h"
53 #include "jfs_inode.h" 53 #include "jfs_inode.h"
54 #include "jfs_filsys.h" 54 #include "jfs_filsys.h"
55 #include "jfs_metapage.h" 55 #include "jfs_metapage.h"
56 #include "jfs_dinode.h" 56 #include "jfs_dinode.h"
57 #include "jfs_imap.h" 57 #include "jfs_imap.h"
58 #include "jfs_dmap.h" 58 #include "jfs_dmap.h"
59 #include "jfs_superblock.h" 59 #include "jfs_superblock.h"
60 #include "jfs_debug.h" 60 #include "jfs_debug.h"
61 61
62 /* 62 /*
63 * transaction management structures 63 * transaction management structures
64 */ 64 */
65 static struct { 65 static struct {
66 int freetid; /* index of a free tid structure */ 66 int freetid; /* index of a free tid structure */
67 int freelock; /* index first free lock word */ 67 int freelock; /* index first free lock word */
68 wait_queue_head_t freewait; /* eventlist of free tblock */ 68 wait_queue_head_t freewait; /* eventlist of free tblock */
69 wait_queue_head_t freelockwait; /* eventlist of free tlock */ 69 wait_queue_head_t freelockwait; /* eventlist of free tlock */
70 wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ 70 wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */
71 int tlocksInUse; /* Number of tlocks in use */ 71 int tlocksInUse; /* Number of tlocks in use */
72 spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ 72 spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */
73 /* struct tblock *sync_queue; * Transactions waiting for data sync */ 73 /* struct tblock *sync_queue; * Transactions waiting for data sync */
74 struct list_head unlock_queue; /* Txns waiting to be released */ 74 struct list_head unlock_queue; /* Txns waiting to be released */
75 struct list_head anon_list; /* inodes having anonymous txns */ 75 struct list_head anon_list; /* inodes having anonymous txns */
76 struct list_head anon_list2; /* inodes having anonymous txns 76 struct list_head anon_list2; /* inodes having anonymous txns
77 that couldn't be sync'ed */ 77 that couldn't be sync'ed */
78 } TxAnchor; 78 } TxAnchor;
79 79
80 int jfs_tlocks_low; /* Indicates low number of available tlocks */ 80 int jfs_tlocks_low; /* Indicates low number of available tlocks */
81 81
82 #ifdef CONFIG_JFS_STATISTICS 82 #ifdef CONFIG_JFS_STATISTICS
83 static struct { 83 static struct {
84 uint txBegin; 84 uint txBegin;
85 uint txBegin_barrier; 85 uint txBegin_barrier;
86 uint txBegin_lockslow; 86 uint txBegin_lockslow;
87 uint txBegin_freetid; 87 uint txBegin_freetid;
88 uint txBeginAnon; 88 uint txBeginAnon;
89 uint txBeginAnon_barrier; 89 uint txBeginAnon_barrier;
90 uint txBeginAnon_lockslow; 90 uint txBeginAnon_lockslow;
91 uint txLockAlloc; 91 uint txLockAlloc;
92 uint txLockAlloc_freelock; 92 uint txLockAlloc_freelock;
93 } TxStat; 93 } TxStat;
94 #endif 94 #endif
95 95
96 static int nTxBlock = -1; /* number of transaction blocks */ 96 static int nTxBlock = -1; /* number of transaction blocks */
97 module_param(nTxBlock, int, 0); 97 module_param(nTxBlock, int, 0);
98 MODULE_PARM_DESC(nTxBlock, 98 MODULE_PARM_DESC(nTxBlock,
99 "Number of transaction blocks (max:65536)"); 99 "Number of transaction blocks (max:65536)");
100 100
101 static int nTxLock = -1; /* number of transaction locks */ 101 static int nTxLock = -1; /* number of transaction locks */
102 module_param(nTxLock, int, 0); 102 module_param(nTxLock, int, 0);
103 MODULE_PARM_DESC(nTxLock, 103 MODULE_PARM_DESC(nTxLock,
104 "Number of transaction locks (max:65536)"); 104 "Number of transaction locks (max:65536)");
105 105
106 struct tblock *TxBlock; /* transaction block table */ 106 struct tblock *TxBlock; /* transaction block table */
107 static int TxLockLWM; /* Low water mark for number of txLocks used */ 107 static int TxLockLWM; /* Low water mark for number of txLocks used */
108 static int TxLockHWM; /* High water mark for number of txLocks used */ 108 static int TxLockHWM; /* High water mark for number of txLocks used */
109 static int TxLockVHWM; /* Very High water mark */ 109 static int TxLockVHWM; /* Very High water mark */
110 struct tlock *TxLock; /* transaction lock table */ 110 struct tlock *TxLock; /* transaction lock table */
111 111
112 /* 112 /*
113 * transaction management lock 113 * transaction management lock
114 */ 114 */
115 static DEFINE_SPINLOCK(jfsTxnLock); 115 static DEFINE_SPINLOCK(jfsTxnLock);
116 116
117 #define TXN_LOCK() spin_lock(&jfsTxnLock) 117 #define TXN_LOCK() spin_lock(&jfsTxnLock)
118 #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) 118 #define TXN_UNLOCK() spin_unlock(&jfsTxnLock)
119 119
120 #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); 120 #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock);
121 #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) 121 #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags)
122 #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) 122 #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
123 123
124 DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait); 124 DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait);
125 DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); 125 DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
126 static int jfs_commit_thread_waking; 126 static int jfs_commit_thread_waking;
127 127
128 /* 128 /*
129 * Retry logic exist outside these macros to protect from spurrious wakeups. 129 * Retry logic exist outside these macros to protect from spurrious wakeups.
130 */ 130 */
131 static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) 131 static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
132 { 132 {
133 DECLARE_WAITQUEUE(wait, current); 133 DECLARE_WAITQUEUE(wait, current);
134 134
135 add_wait_queue(event, &wait); 135 add_wait_queue(event, &wait);
136 set_current_state(TASK_UNINTERRUPTIBLE); 136 set_current_state(TASK_UNINTERRUPTIBLE);
137 TXN_UNLOCK(); 137 TXN_UNLOCK();
138 schedule(); 138 schedule();
139 current->state = TASK_RUNNING; 139 current->state = TASK_RUNNING;
140 remove_wait_queue(event, &wait); 140 remove_wait_queue(event, &wait);
141 } 141 }
142 142
143 #define TXN_SLEEP(event)\ 143 #define TXN_SLEEP(event)\
144 {\ 144 {\
145 TXN_SLEEP_DROP_LOCK(event);\ 145 TXN_SLEEP_DROP_LOCK(event);\
146 TXN_LOCK();\ 146 TXN_LOCK();\
147 } 147 }
148 148
149 #define TXN_WAKEUP(event) wake_up_all(event) 149 #define TXN_WAKEUP(event) wake_up_all(event)
150 150
151 /* 151 /*
152 * statistics 152 * statistics
153 */ 153 */
154 static struct { 154 static struct {
155 tid_t maxtid; /* 4: biggest tid ever used */ 155 tid_t maxtid; /* 4: biggest tid ever used */
156 lid_t maxlid; /* 4: biggest lid ever used */ 156 lid_t maxlid; /* 4: biggest lid ever used */
157 int ntid; /* 4: # of transactions performed */ 157 int ntid; /* 4: # of transactions performed */
158 int nlid; /* 4: # of tlocks acquired */ 158 int nlid; /* 4: # of tlocks acquired */
159 int waitlock; /* 4: # of tlock wait */ 159 int waitlock; /* 4: # of tlock wait */
160 } stattx; 160 } stattx;
161 161
162 /* 162 /*
163 * forward references 163 * forward references
164 */ 164 */
165 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 165 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
166 struct tlock * tlck, struct commit * cd); 166 struct tlock * tlck, struct commit * cd);
167 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 167 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
168 struct tlock * tlck); 168 struct tlock * tlck);
169 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 169 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
170 struct tlock * tlck); 170 struct tlock * tlck);
171 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 171 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
172 struct tlock * tlck); 172 struct tlock * tlck);
173 static void txAllocPMap(struct inode *ip, struct maplock * maplock, 173 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
174 struct tblock * tblk); 174 struct tblock * tblk);
175 static void txForce(struct tblock * tblk); 175 static void txForce(struct tblock * tblk);
176 static int txLog(struct jfs_log * log, struct tblock * tblk, 176 static int txLog(struct jfs_log * log, struct tblock * tblk,
177 struct commit * cd); 177 struct commit * cd);
178 static void txUpdateMap(struct tblock * tblk); 178 static void txUpdateMap(struct tblock * tblk);
179 static void txRelease(struct tblock * tblk); 179 static void txRelease(struct tblock * tblk);
180 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 180 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
181 struct tlock * tlck); 181 struct tlock * tlck);
182 static void LogSyncRelease(struct metapage * mp); 182 static void LogSyncRelease(struct metapage * mp);
183 183
184 /* 184 /*
185 * transaction block/lock management 185 * transaction block/lock management
186 * --------------------------------- 186 * ---------------------------------
187 */ 187 */
188 188
189 /* 189 /*
190 * Get a transaction lock from the free list. If the number in use is 190 * Get a transaction lock from the free list. If the number in use is
191 * greater than the high water mark, wake up the sync daemon. This should 191 * greater than the high water mark, wake up the sync daemon. This should
192 * free some anonymous transaction locks. (TXN_LOCK must be held.) 192 * free some anonymous transaction locks. (TXN_LOCK must be held.)
193 */ 193 */
194 static lid_t txLockAlloc(void) 194 static lid_t txLockAlloc(void)
195 { 195 {
196 lid_t lid; 196 lid_t lid;
197 197
198 INCREMENT(TxStat.txLockAlloc); 198 INCREMENT(TxStat.txLockAlloc);
199 if (!TxAnchor.freelock) { 199 if (!TxAnchor.freelock) {
200 INCREMENT(TxStat.txLockAlloc_freelock); 200 INCREMENT(TxStat.txLockAlloc_freelock);
201 } 201 }
202 202
203 while (!(lid = TxAnchor.freelock)) 203 while (!(lid = TxAnchor.freelock))
204 TXN_SLEEP(&TxAnchor.freelockwait); 204 TXN_SLEEP(&TxAnchor.freelockwait);
205 TxAnchor.freelock = TxLock[lid].next; 205 TxAnchor.freelock = TxLock[lid].next;
206 HIGHWATERMARK(stattx.maxlid, lid); 206 HIGHWATERMARK(stattx.maxlid, lid);
207 if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { 207 if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
208 jfs_info("txLockAlloc tlocks low"); 208 jfs_info("txLockAlloc tlocks low");
209 jfs_tlocks_low = 1; 209 jfs_tlocks_low = 1;
210 wake_up(&jfs_sync_thread_wait); 210 wake_up(&jfs_sync_thread_wait);
211 } 211 }
212 212
213 return lid; 213 return lid;
214 } 214 }
215 215
216 static void txLockFree(lid_t lid) 216 static void txLockFree(lid_t lid)
217 { 217 {
218 TxLock[lid].tid = 0; 218 TxLock[lid].tid = 0;
219 TxLock[lid].next = TxAnchor.freelock; 219 TxLock[lid].next = TxAnchor.freelock;
220 TxAnchor.freelock = lid; 220 TxAnchor.freelock = lid;
221 TxAnchor.tlocksInUse--; 221 TxAnchor.tlocksInUse--;
222 if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { 222 if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
223 jfs_info("txLockFree jfs_tlocks_low no more"); 223 jfs_info("txLockFree jfs_tlocks_low no more");
224 jfs_tlocks_low = 0; 224 jfs_tlocks_low = 0;
225 TXN_WAKEUP(&TxAnchor.lowlockwait); 225 TXN_WAKEUP(&TxAnchor.lowlockwait);
226 } 226 }
227 TXN_WAKEUP(&TxAnchor.freelockwait); 227 TXN_WAKEUP(&TxAnchor.freelockwait);
228 } 228 }
229 229
230 /* 230 /*
231 * NAME: txInit() 231 * NAME: txInit()
232 * 232 *
233 * FUNCTION: initialize transaction management structures 233 * FUNCTION: initialize transaction management structures
234 * 234 *
235 * RETURN: 235 * RETURN:
236 * 236 *
237 * serialization: single thread at jfs_init() 237 * serialization: single thread at jfs_init()
238 */ 238 */
239 int txInit(void) 239 int txInit(void)
240 { 240 {
241 int k, size; 241 int k, size;
242 struct sysinfo si; 242 struct sysinfo si;
243 243
244 /* Set defaults for nTxLock and nTxBlock if unset */ 244 /* Set defaults for nTxLock and nTxBlock if unset */
245 245
246 if (nTxLock == -1) { 246 if (nTxLock == -1) {
247 if (nTxBlock == -1) { 247 if (nTxBlock == -1) {
248 /* Base default on memory size */ 248 /* Base default on memory size */
249 si_meminfo(&si); 249 si_meminfo(&si);
250 if (si.totalram > (256 * 1024)) /* 1 GB */ 250 if (si.totalram > (256 * 1024)) /* 1 GB */
251 nTxLock = 64 * 1024; 251 nTxLock = 64 * 1024;
252 else 252 else
253 nTxLock = si.totalram >> 2; 253 nTxLock = si.totalram >> 2;
254 } else if (nTxBlock > (8 * 1024)) 254 } else if (nTxBlock > (8 * 1024))
255 nTxLock = 64 * 1024; 255 nTxLock = 64 * 1024;
256 else 256 else
257 nTxLock = nTxBlock << 3; 257 nTxLock = nTxBlock << 3;
258 } 258 }
259 if (nTxBlock == -1) 259 if (nTxBlock == -1)
260 nTxBlock = nTxLock >> 3; 260 nTxBlock = nTxLock >> 3;
261 261
262 /* Verify tunable parameters */ 262 /* Verify tunable parameters */
263 if (nTxBlock < 16) 263 if (nTxBlock < 16)
264 nTxBlock = 16; /* No one should set it this low */ 264 nTxBlock = 16; /* No one should set it this low */
265 if (nTxBlock > 65536) 265 if (nTxBlock > 65536)
266 nTxBlock = 65536; 266 nTxBlock = 65536;
267 if (nTxLock < 256) 267 if (nTxLock < 256)
268 nTxLock = 256; /* No one should set it this low */ 268 nTxLock = 256; /* No one should set it this low */
269 if (nTxLock > 65536) 269 if (nTxLock > 65536)
270 nTxLock = 65536; 270 nTxLock = 65536;
271 271
272 printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", 272 printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
273 nTxBlock, nTxLock); 273 nTxBlock, nTxLock);
274 /* 274 /*
275 * initialize transaction block (tblock) table 275 * initialize transaction block (tblock) table
276 * 276 *
277 * transaction id (tid) = tblock index 277 * transaction id (tid) = tblock index
278 * tid = 0 is reserved. 278 * tid = 0 is reserved.
279 */ 279 */
280 TxLockLWM = (nTxLock * 4) / 10; 280 TxLockLWM = (nTxLock * 4) / 10;
281 TxLockHWM = (nTxLock * 7) / 10; 281 TxLockHWM = (nTxLock * 7) / 10;
282 TxLockVHWM = (nTxLock * 8) / 10; 282 TxLockVHWM = (nTxLock * 8) / 10;
283 283
284 size = sizeof(struct tblock) * nTxBlock; 284 size = sizeof(struct tblock) * nTxBlock;
285 TxBlock = (struct tblock *) vmalloc(size); 285 TxBlock = (struct tblock *) vmalloc(size);
286 if (TxBlock == NULL) 286 if (TxBlock == NULL)
287 return -ENOMEM; 287 return -ENOMEM;
288 288
289 for (k = 1; k < nTxBlock - 1; k++) { 289 for (k = 1; k < nTxBlock - 1; k++) {
290 TxBlock[k].next = k + 1; 290 TxBlock[k].next = k + 1;
291 init_waitqueue_head(&TxBlock[k].gcwait); 291 init_waitqueue_head(&TxBlock[k].gcwait);
292 init_waitqueue_head(&TxBlock[k].waitor); 292 init_waitqueue_head(&TxBlock[k].waitor);
293 } 293 }
294 TxBlock[k].next = 0; 294 TxBlock[k].next = 0;
295 init_waitqueue_head(&TxBlock[k].gcwait); 295 init_waitqueue_head(&TxBlock[k].gcwait);
296 init_waitqueue_head(&TxBlock[k].waitor); 296 init_waitqueue_head(&TxBlock[k].waitor);
297 297
298 TxAnchor.freetid = 1; 298 TxAnchor.freetid = 1;
299 init_waitqueue_head(&TxAnchor.freewait); 299 init_waitqueue_head(&TxAnchor.freewait);
300 300
301 stattx.maxtid = 1; /* statistics */ 301 stattx.maxtid = 1; /* statistics */
302 302
303 /* 303 /*
304 * initialize transaction lock (tlock) table 304 * initialize transaction lock (tlock) table
305 * 305 *
306 * transaction lock id = tlock index 306 * transaction lock id = tlock index
307 * tlock id = 0 is reserved. 307 * tlock id = 0 is reserved.
308 */ 308 */
309 size = sizeof(struct tlock) * nTxLock; 309 size = sizeof(struct tlock) * nTxLock;
310 TxLock = (struct tlock *) vmalloc(size); 310 TxLock = (struct tlock *) vmalloc(size);
311 if (TxLock == NULL) { 311 if (TxLock == NULL) {
312 vfree(TxBlock); 312 vfree(TxBlock);
313 return -ENOMEM; 313 return -ENOMEM;
314 } 314 }
315 315
316 /* initialize tlock table */ 316 /* initialize tlock table */
317 for (k = 1; k < nTxLock - 1; k++) 317 for (k = 1; k < nTxLock - 1; k++)
318 TxLock[k].next = k + 1; 318 TxLock[k].next = k + 1;
319 TxLock[k].next = 0; 319 TxLock[k].next = 0;
320 init_waitqueue_head(&TxAnchor.freelockwait); 320 init_waitqueue_head(&TxAnchor.freelockwait);
321 init_waitqueue_head(&TxAnchor.lowlockwait); 321 init_waitqueue_head(&TxAnchor.lowlockwait);
322 322
323 TxAnchor.freelock = 1; 323 TxAnchor.freelock = 1;
324 TxAnchor.tlocksInUse = 0; 324 TxAnchor.tlocksInUse = 0;
325 INIT_LIST_HEAD(&TxAnchor.anon_list); 325 INIT_LIST_HEAD(&TxAnchor.anon_list);
326 INIT_LIST_HEAD(&TxAnchor.anon_list2); 326 INIT_LIST_HEAD(&TxAnchor.anon_list2);
327 327
328 LAZY_LOCK_INIT(); 328 LAZY_LOCK_INIT();
329 INIT_LIST_HEAD(&TxAnchor.unlock_queue); 329 INIT_LIST_HEAD(&TxAnchor.unlock_queue);
330 330
331 stattx.maxlid = 1; /* statistics */ 331 stattx.maxlid = 1; /* statistics */
332 332
333 return 0; 333 return 0;
334 } 334 }
335 335
336 /* 336 /*
337 * NAME: txExit() 337 * NAME: txExit()
338 * 338 *
339 * FUNCTION: clean up when module is unloaded 339 * FUNCTION: clean up when module is unloaded
340 */ 340 */
341 void txExit(void) 341 void txExit(void)
342 { 342 {
343 vfree(TxLock); 343 vfree(TxLock);
344 TxLock = NULL; 344 TxLock = NULL;
345 vfree(TxBlock); 345 vfree(TxBlock);
346 TxBlock = NULL; 346 TxBlock = NULL;
347 } 347 }
348 348
349 /* 349 /*
350 * NAME: txBegin() 350 * NAME: txBegin()
351 * 351 *
352 * FUNCTION: start a transaction. 352 * FUNCTION: start a transaction.
353 * 353 *
354 * PARAMETER: sb - superblock 354 * PARAMETER: sb - superblock
355 * flag - force for nested tx; 355 * flag - force for nested tx;
356 * 356 *
357 * RETURN: tid - transaction id 357 * RETURN: tid - transaction id
358 * 358 *
359 * note: flag force allows to start tx for nested tx 359 * note: flag force allows to start tx for nested tx
360 * to prevent deadlock on logsync barrier; 360 * to prevent deadlock on logsync barrier;
361 */ 361 */
362 tid_t txBegin(struct super_block *sb, int flag) 362 tid_t txBegin(struct super_block *sb, int flag)
363 { 363 {
364 tid_t t; 364 tid_t t;
365 struct tblock *tblk; 365 struct tblock *tblk;
366 struct jfs_log *log; 366 struct jfs_log *log;
367 367
368 jfs_info("txBegin: flag = 0x%x", flag); 368 jfs_info("txBegin: flag = 0x%x", flag);
369 log = JFS_SBI(sb)->log; 369 log = JFS_SBI(sb)->log;
370 370
371 TXN_LOCK(); 371 TXN_LOCK();
372 372
373 INCREMENT(TxStat.txBegin); 373 INCREMENT(TxStat.txBegin);
374 374
375 retry: 375 retry:
376 if (!(flag & COMMIT_FORCE)) { 376 if (!(flag & COMMIT_FORCE)) {
377 /* 377 /*
378 * synchronize with logsync barrier 378 * synchronize with logsync barrier
379 */ 379 */
380 if (test_bit(log_SYNCBARRIER, &log->flag) || 380 if (test_bit(log_SYNCBARRIER, &log->flag) ||
381 test_bit(log_QUIESCE, &log->flag)) { 381 test_bit(log_QUIESCE, &log->flag)) {
382 INCREMENT(TxStat.txBegin_barrier); 382 INCREMENT(TxStat.txBegin_barrier);
383 TXN_SLEEP(&log->syncwait); 383 TXN_SLEEP(&log->syncwait);
384 goto retry; 384 goto retry;
385 } 385 }
386 } 386 }
387 if (flag == 0) { 387 if (flag == 0) {
388 /* 388 /*
389 * Don't begin transaction if we're getting starved for tlocks 389 * Don't begin transaction if we're getting starved for tlocks
390 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately 390 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
391 * free tlocks) 391 * free tlocks)
392 */ 392 */
393 if (TxAnchor.tlocksInUse > TxLockVHWM) { 393 if (TxAnchor.tlocksInUse > TxLockVHWM) {
394 INCREMENT(TxStat.txBegin_lockslow); 394 INCREMENT(TxStat.txBegin_lockslow);
395 TXN_SLEEP(&TxAnchor.lowlockwait); 395 TXN_SLEEP(&TxAnchor.lowlockwait);
396 goto retry; 396 goto retry;
397 } 397 }
398 } 398 }
399 399
400 /* 400 /*
401 * allocate transaction id/block 401 * allocate transaction id/block
402 */ 402 */
403 if ((t = TxAnchor.freetid) == 0) { 403 if ((t = TxAnchor.freetid) == 0) {
404 jfs_info("txBegin: waiting for free tid"); 404 jfs_info("txBegin: waiting for free tid");
405 INCREMENT(TxStat.txBegin_freetid); 405 INCREMENT(TxStat.txBegin_freetid);
406 TXN_SLEEP(&TxAnchor.freewait); 406 TXN_SLEEP(&TxAnchor.freewait);
407 goto retry; 407 goto retry;
408 } 408 }
409 409
410 tblk = tid_to_tblock(t); 410 tblk = tid_to_tblock(t);
411 411
412 if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { 412 if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
413 /* Don't let a non-forced transaction take the last tblk */ 413 /* Don't let a non-forced transaction take the last tblk */
414 jfs_info("txBegin: waiting for free tid"); 414 jfs_info("txBegin: waiting for free tid");
415 INCREMENT(TxStat.txBegin_freetid); 415 INCREMENT(TxStat.txBegin_freetid);
416 TXN_SLEEP(&TxAnchor.freewait); 416 TXN_SLEEP(&TxAnchor.freewait);
417 goto retry; 417 goto retry;
418 } 418 }
419 419
420 TxAnchor.freetid = tblk->next; 420 TxAnchor.freetid = tblk->next;
421 421
422 /* 422 /*
423 * initialize transaction 423 * initialize transaction
424 */ 424 */
425 425
426 /* 426 /*
427 * We can't zero the whole thing or we screw up another thread being 427 * We can't zero the whole thing or we screw up another thread being
428 * awakened after sleeping on tblk->waitor 428 * awakened after sleeping on tblk->waitor
429 * 429 *
430 * memset(tblk, 0, sizeof(struct tblock)); 430 * memset(tblk, 0, sizeof(struct tblock));
431 */ 431 */
432 tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; 432 tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
433 433
434 tblk->sb = sb; 434 tblk->sb = sb;
435 ++log->logtid; 435 ++log->logtid;
436 tblk->logtid = log->logtid; 436 tblk->logtid = log->logtid;
437 437
438 ++log->active; 438 ++log->active;
439 439
440 HIGHWATERMARK(stattx.maxtid, t); /* statistics */ 440 HIGHWATERMARK(stattx.maxtid, t); /* statistics */
441 INCREMENT(stattx.ntid); /* statistics */ 441 INCREMENT(stattx.ntid); /* statistics */
442 442
443 TXN_UNLOCK(); 443 TXN_UNLOCK();
444 444
445 jfs_info("txBegin: returning tid = %d", t); 445 jfs_info("txBegin: returning tid = %d", t);
446 446
447 return t; 447 return t;
448 } 448 }
449 449
450 /* 450 /*
451 * NAME: txBeginAnon() 451 * NAME: txBeginAnon()
452 * 452 *
453 * FUNCTION: start an anonymous transaction. 453 * FUNCTION: start an anonymous transaction.
454 * Blocks if logsync or available tlocks are low to prevent 454 * Blocks if logsync or available tlocks are low to prevent
455 * anonymous tlocks from depleting supply. 455 * anonymous tlocks from depleting supply.
456 * 456 *
457 * PARAMETER: sb - superblock 457 * PARAMETER: sb - superblock
458 * 458 *
459 * RETURN: none 459 * RETURN: none
460 */ 460 */
461 void txBeginAnon(struct super_block *sb) 461 void txBeginAnon(struct super_block *sb)
462 { 462 {
463 struct jfs_log *log; 463 struct jfs_log *log;
464 464
465 log = JFS_SBI(sb)->log; 465 log = JFS_SBI(sb)->log;
466 466
467 TXN_LOCK(); 467 TXN_LOCK();
468 INCREMENT(TxStat.txBeginAnon); 468 INCREMENT(TxStat.txBeginAnon);
469 469
470 retry: 470 retry:
471 /* 471 /*
472 * synchronize with logsync barrier 472 * synchronize with logsync barrier
473 */ 473 */
474 if (test_bit(log_SYNCBARRIER, &log->flag) || 474 if (test_bit(log_SYNCBARRIER, &log->flag) ||
475 test_bit(log_QUIESCE, &log->flag)) { 475 test_bit(log_QUIESCE, &log->flag)) {
476 INCREMENT(TxStat.txBeginAnon_barrier); 476 INCREMENT(TxStat.txBeginAnon_barrier);
477 TXN_SLEEP(&log->syncwait); 477 TXN_SLEEP(&log->syncwait);
478 goto retry; 478 goto retry;
479 } 479 }
480 480
481 /* 481 /*
482 * Don't begin transaction if we're getting starved for tlocks 482 * Don't begin transaction if we're getting starved for tlocks
483 */ 483 */
484 if (TxAnchor.tlocksInUse > TxLockVHWM) { 484 if (TxAnchor.tlocksInUse > TxLockVHWM) {
485 INCREMENT(TxStat.txBeginAnon_lockslow); 485 INCREMENT(TxStat.txBeginAnon_lockslow);
486 TXN_SLEEP(&TxAnchor.lowlockwait); 486 TXN_SLEEP(&TxAnchor.lowlockwait);
487 goto retry; 487 goto retry;
488 } 488 }
489 TXN_UNLOCK(); 489 TXN_UNLOCK();
490 } 490 }
491 491
492 /* 492 /*
493 * txEnd() 493 * txEnd()
494 * 494 *
495 * function: free specified transaction block. 495 * function: free specified transaction block.
496 * 496 *
497 * logsync barrier processing: 497 * logsync barrier processing:
498 * 498 *
499 * serialization: 499 * serialization:
500 */ 500 */
501 void txEnd(tid_t tid) 501 void txEnd(tid_t tid)
502 { 502 {
503 struct tblock *tblk = tid_to_tblock(tid); 503 struct tblock *tblk = tid_to_tblock(tid);
504 struct jfs_log *log; 504 struct jfs_log *log;
505 505
506 jfs_info("txEnd: tid = %d", tid); 506 jfs_info("txEnd: tid = %d", tid);
507 TXN_LOCK(); 507 TXN_LOCK();
508 508
509 /* 509 /*
510 * wakeup transactions waiting on the page locked 510 * wakeup transactions waiting on the page locked
511 * by the current transaction 511 * by the current transaction
512 */ 512 */
513 TXN_WAKEUP(&tblk->waitor); 513 TXN_WAKEUP(&tblk->waitor);
514 514
515 log = JFS_SBI(tblk->sb)->log; 515 log = JFS_SBI(tblk->sb)->log;
516 516
517 /* 517 /*
518 * Lazy commit thread can't free this guy until we mark it UNLOCKED, 518 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
519 * otherwise, we would be left with a transaction that may have been 519 * otherwise, we would be left with a transaction that may have been
520 * reused. 520 * reused.
521 * 521 *
522 * Lazy commit thread will turn off tblkGC_LAZY before calling this 522 * Lazy commit thread will turn off tblkGC_LAZY before calling this
523 * routine. 523 * routine.
524 */ 524 */
525 if (tblk->flag & tblkGC_LAZY) { 525 if (tblk->flag & tblkGC_LAZY) {
526 jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); 526 jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
527 TXN_UNLOCK(); 527 TXN_UNLOCK();
528 528
529 spin_lock_irq(&log->gclock); // LOGGC_LOCK 529 spin_lock_irq(&log->gclock); // LOGGC_LOCK
530 tblk->flag |= tblkGC_UNLOCKED; 530 tblk->flag |= tblkGC_UNLOCKED;
531 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 531 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK
532 return; 532 return;
533 } 533 }
534 534
535 jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); 535 jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
536 536
537 assert(tblk->next == 0); 537 assert(tblk->next == 0);
538 538
539 /* 539 /*
540 * insert tblock back on freelist 540 * insert tblock back on freelist
541 */ 541 */
542 tblk->next = TxAnchor.freetid; 542 tblk->next = TxAnchor.freetid;
543 TxAnchor.freetid = tid; 543 TxAnchor.freetid = tid;
544 544
545 /* 545 /*
546 * mark the tblock not active 546 * mark the tblock not active
547 */ 547 */
548 if (--log->active == 0) { 548 if (--log->active == 0) {
549 clear_bit(log_FLUSH, &log->flag); 549 clear_bit(log_FLUSH, &log->flag);
550 550
551 /* 551 /*
552 * synchronize with logsync barrier 552 * synchronize with logsync barrier
553 */ 553 */
554 if (test_bit(log_SYNCBARRIER, &log->flag)) { 554 if (test_bit(log_SYNCBARRIER, &log->flag)) {
555 TXN_UNLOCK();
556
557 /* write dirty metadata & forward log syncpt */
558 jfs_syncpt(log, 1);
559
555 jfs_info("log barrier off: 0x%x", log->lsn); 560 jfs_info("log barrier off: 0x%x", log->lsn);
556 561
557 /* enable new transactions start */ 562 /* enable new transactions start */
558 clear_bit(log_SYNCBARRIER, &log->flag); 563 clear_bit(log_SYNCBARRIER, &log->flag);
559 564
560 /* wakeup all waitors for logsync barrier */ 565 /* wakeup all waitors for logsync barrier */
561 TXN_WAKEUP(&log->syncwait); 566 TXN_WAKEUP(&log->syncwait);
562
563 TXN_UNLOCK();
564
565 /* forward log syncpt */
566 jfs_syncpt(log);
567 567
568 goto wakeup; 568 goto wakeup;
569 } 569 }
570 } 570 }
571 571
572 TXN_UNLOCK(); 572 TXN_UNLOCK();
573 wakeup: 573 wakeup:
574 /* 574 /*
575 * wakeup all waitors for a free tblock 575 * wakeup all waitors for a free tblock
576 */ 576 */
577 TXN_WAKEUP(&TxAnchor.freewait); 577 TXN_WAKEUP(&TxAnchor.freewait);
578 } 578 }
579 579
580 /* 580 /*
581 * txLock() 581 * txLock()
582 * 582 *
583 * function: acquire a transaction lock on the specified <mp> 583 * function: acquire a transaction lock on the specified <mp>
584 * 584 *
585 * parameter: 585 * parameter:
586 * 586 *
587 * return: transaction lock id 587 * return: transaction lock id
588 * 588 *
589 * serialization: 589 * serialization:
590 */ 590 */
591 struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, 591 struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
592 int type) 592 int type)
593 { 593 {
594 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 594 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
595 int dir_xtree = 0; 595 int dir_xtree = 0;
596 lid_t lid; 596 lid_t lid;
597 tid_t xtid; 597 tid_t xtid;
598 struct tlock *tlck; 598 struct tlock *tlck;
599 struct xtlock *xtlck; 599 struct xtlock *xtlck;
600 struct linelock *linelock; 600 struct linelock *linelock;
601 xtpage_t *p; 601 xtpage_t *p;
602 struct tblock *tblk; 602 struct tblock *tblk;
603 603
604 TXN_LOCK(); 604 TXN_LOCK();
605 605
606 if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && 606 if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
607 !(mp->xflag & COMMIT_PAGE)) { 607 !(mp->xflag & COMMIT_PAGE)) {
608 /* 608 /*
609 * Directory inode is special. It can have both an xtree tlock 609 * Directory inode is special. It can have both an xtree tlock
610 * and a dtree tlock associated with it. 610 * and a dtree tlock associated with it.
611 */ 611 */
612 dir_xtree = 1; 612 dir_xtree = 1;
613 lid = jfs_ip->xtlid; 613 lid = jfs_ip->xtlid;
614 } else 614 } else
615 lid = mp->lid; 615 lid = mp->lid;
616 616
617 /* is page not locked by a transaction ? */ 617 /* is page not locked by a transaction ? */
618 if (lid == 0) 618 if (lid == 0)
619 goto allocateLock; 619 goto allocateLock;
620 620
621 jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); 621 jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
622 622
623 /* is page locked by the requester transaction ? */ 623 /* is page locked by the requester transaction ? */
624 tlck = lid_to_tlock(lid); 624 tlck = lid_to_tlock(lid);
625 if ((xtid = tlck->tid) == tid) { 625 if ((xtid = tlck->tid) == tid) {
626 TXN_UNLOCK(); 626 TXN_UNLOCK();
627 goto grantLock; 627 goto grantLock;
628 } 628 }
629 629
630 /* 630 /*
631 * is page locked by anonymous transaction/lock ? 631 * is page locked by anonymous transaction/lock ?
632 * 632 *
633 * (page update without transaction (i.e., file write) is 633 * (page update without transaction (i.e., file write) is
634 * locked under anonymous transaction tid = 0: 634 * locked under anonymous transaction tid = 0:
635 * anonymous tlocks maintained on anonymous tlock list of 635 * anonymous tlocks maintained on anonymous tlock list of
636 * the inode of the page and available to all anonymous 636 * the inode of the page and available to all anonymous
637 * transactions until txCommit() time at which point 637 * transactions until txCommit() time at which point
638 * they are transferred to the transaction tlock list of 638 * they are transferred to the transaction tlock list of
639 * the commiting transaction of the inode) 639 * the commiting transaction of the inode)
640 */ 640 */
641 if (xtid == 0) { 641 if (xtid == 0) {
642 tlck->tid = tid; 642 tlck->tid = tid;
643 TXN_UNLOCK(); 643 TXN_UNLOCK();
644 tblk = tid_to_tblock(tid); 644 tblk = tid_to_tblock(tid);
645 /* 645 /*
646 * The order of the tlocks in the transaction is important 646 * The order of the tlocks in the transaction is important
647 * (during truncate, child xtree pages must be freed before 647 * (during truncate, child xtree pages must be freed before
648 * parent's tlocks change the working map). 648 * parent's tlocks change the working map).
649 * Take tlock off anonymous list and add to tail of 649 * Take tlock off anonymous list and add to tail of
650 * transaction list 650 * transaction list
651 * 651 *
652 * Note: We really need to get rid of the tid & lid and 652 * Note: We really need to get rid of the tid & lid and
653 * use list_head's. This code is getting UGLY! 653 * use list_head's. This code is getting UGLY!
654 */ 654 */
655 if (jfs_ip->atlhead == lid) { 655 if (jfs_ip->atlhead == lid) {
656 if (jfs_ip->atltail == lid) { 656 if (jfs_ip->atltail == lid) {
657 /* only anonymous txn. 657 /* only anonymous txn.
658 * Remove from anon_list 658 * Remove from anon_list
659 */ 659 */
660 list_del_init(&jfs_ip->anon_inode_list); 660 list_del_init(&jfs_ip->anon_inode_list);
661 } 661 }
662 jfs_ip->atlhead = tlck->next; 662 jfs_ip->atlhead = tlck->next;
663 } else { 663 } else {
664 lid_t last; 664 lid_t last;
665 for (last = jfs_ip->atlhead; 665 for (last = jfs_ip->atlhead;
666 lid_to_tlock(last)->next != lid; 666 lid_to_tlock(last)->next != lid;
667 last = lid_to_tlock(last)->next) { 667 last = lid_to_tlock(last)->next) {
668 assert(last); 668 assert(last);
669 } 669 }
670 lid_to_tlock(last)->next = tlck->next; 670 lid_to_tlock(last)->next = tlck->next;
671 if (jfs_ip->atltail == lid) 671 if (jfs_ip->atltail == lid)
672 jfs_ip->atltail = last; 672 jfs_ip->atltail = last;
673 } 673 }
674 674
675 /* insert the tlock at tail of transaction tlock list */ 675 /* insert the tlock at tail of transaction tlock list */
676 676
677 if (tblk->next) 677 if (tblk->next)
678 lid_to_tlock(tblk->last)->next = lid; 678 lid_to_tlock(tblk->last)->next = lid;
679 else 679 else
680 tblk->next = lid; 680 tblk->next = lid;
681 tlck->next = 0; 681 tlck->next = 0;
682 tblk->last = lid; 682 tblk->last = lid;
683 683
684 goto grantLock; 684 goto grantLock;
685 } 685 }
686 686
687 goto waitLock; 687 goto waitLock;
688 688
689 /* 689 /*
690 * allocate a tlock 690 * allocate a tlock
691 */ 691 */
692 allocateLock: 692 allocateLock:
693 lid = txLockAlloc(); 693 lid = txLockAlloc();
694 tlck = lid_to_tlock(lid); 694 tlck = lid_to_tlock(lid);
695 695
696 /* 696 /*
697 * initialize tlock 697 * initialize tlock
698 */ 698 */
699 tlck->tid = tid; 699 tlck->tid = tid;
700 700
701 TXN_UNLOCK(); 701 TXN_UNLOCK();
702 702
703 /* mark tlock for meta-data page */ 703 /* mark tlock for meta-data page */
704 if (mp->xflag & COMMIT_PAGE) { 704 if (mp->xflag & COMMIT_PAGE) {
705 705
706 tlck->flag = tlckPAGELOCK; 706 tlck->flag = tlckPAGELOCK;
707 707
708 /* mark the page dirty and nohomeok */ 708 /* mark the page dirty and nohomeok */
709 metapage_nohomeok(mp); 709 metapage_nohomeok(mp);
710 710
711 jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", 711 jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
712 mp, mp->nohomeok, tid, tlck); 712 mp, mp->nohomeok, tid, tlck);
713 713
714 /* if anonymous transaction, and buffer is on the group 714 /* if anonymous transaction, and buffer is on the group
715 * commit synclist, mark inode to show this. This will 715 * commit synclist, mark inode to show this. This will
716 * prevent the buffer from being marked nohomeok for too 716 * prevent the buffer from being marked nohomeok for too
717 * long a time. 717 * long a time.
718 */ 718 */
719 if ((tid == 0) && mp->lsn) 719 if ((tid == 0) && mp->lsn)
720 set_cflag(COMMIT_Synclist, ip); 720 set_cflag(COMMIT_Synclist, ip);
721 } 721 }
722 /* mark tlock for in-memory inode */ 722 /* mark tlock for in-memory inode */
723 else 723 else
724 tlck->flag = tlckINODELOCK; 724 tlck->flag = tlckINODELOCK;
725 725
726 tlck->type = 0; 726 tlck->type = 0;
727 727
728 /* bind the tlock and the page */ 728 /* bind the tlock and the page */
729 tlck->ip = ip; 729 tlck->ip = ip;
730 tlck->mp = mp; 730 tlck->mp = mp;
731 if (dir_xtree) 731 if (dir_xtree)
732 jfs_ip->xtlid = lid; 732 jfs_ip->xtlid = lid;
733 else 733 else
734 mp->lid = lid; 734 mp->lid = lid;
735 735
736 /* 736 /*
737 * enqueue transaction lock to transaction/inode 737 * enqueue transaction lock to transaction/inode
738 */ 738 */
739 /* insert the tlock at tail of transaction tlock list */ 739 /* insert the tlock at tail of transaction tlock list */
740 if (tid) { 740 if (tid) {
741 tblk = tid_to_tblock(tid); 741 tblk = tid_to_tblock(tid);
742 if (tblk->next) 742 if (tblk->next)
743 lid_to_tlock(tblk->last)->next = lid; 743 lid_to_tlock(tblk->last)->next = lid;
744 else 744 else
745 tblk->next = lid; 745 tblk->next = lid;
746 tlck->next = 0; 746 tlck->next = 0;
747 tblk->last = lid; 747 tblk->last = lid;
748 } 748 }
749 /* anonymous transaction: 749 /* anonymous transaction:
750 * insert the tlock at head of inode anonymous tlock list 750 * insert the tlock at head of inode anonymous tlock list
751 */ 751 */
752 else { 752 else {
753 tlck->next = jfs_ip->atlhead; 753 tlck->next = jfs_ip->atlhead;
754 jfs_ip->atlhead = lid; 754 jfs_ip->atlhead = lid;
755 if (tlck->next == 0) { 755 if (tlck->next == 0) {
756 /* This inode's first anonymous transaction */ 756 /* This inode's first anonymous transaction */
757 jfs_ip->atltail = lid; 757 jfs_ip->atltail = lid;
758 TXN_LOCK(); 758 TXN_LOCK();
759 list_add_tail(&jfs_ip->anon_inode_list, 759 list_add_tail(&jfs_ip->anon_inode_list,
760 &TxAnchor.anon_list); 760 &TxAnchor.anon_list);
761 TXN_UNLOCK(); 761 TXN_UNLOCK();
762 } 762 }
763 } 763 }
764 764
765 /* initialize type dependent area for linelock */ 765 /* initialize type dependent area for linelock */
766 linelock = (struct linelock *) & tlck->lock; 766 linelock = (struct linelock *) & tlck->lock;
767 linelock->next = 0; 767 linelock->next = 0;
768 linelock->flag = tlckLINELOCK; 768 linelock->flag = tlckLINELOCK;
769 linelock->maxcnt = TLOCKSHORT; 769 linelock->maxcnt = TLOCKSHORT;
770 linelock->index = 0; 770 linelock->index = 0;
771 771
772 switch (type & tlckTYPE) { 772 switch (type & tlckTYPE) {
773 case tlckDTREE: 773 case tlckDTREE:
774 linelock->l2linesize = L2DTSLOTSIZE; 774 linelock->l2linesize = L2DTSLOTSIZE;
775 break; 775 break;
776 776
777 case tlckXTREE: 777 case tlckXTREE:
778 linelock->l2linesize = L2XTSLOTSIZE; 778 linelock->l2linesize = L2XTSLOTSIZE;
779 779
780 xtlck = (struct xtlock *) linelock; 780 xtlck = (struct xtlock *) linelock;
781 xtlck->header.offset = 0; 781 xtlck->header.offset = 0;
782 xtlck->header.length = 2; 782 xtlck->header.length = 2;
783 783
784 if (type & tlckNEW) { 784 if (type & tlckNEW) {
785 xtlck->lwm.offset = XTENTRYSTART; 785 xtlck->lwm.offset = XTENTRYSTART;
786 } else { 786 } else {
787 if (mp->xflag & COMMIT_PAGE) 787 if (mp->xflag & COMMIT_PAGE)
788 p = (xtpage_t *) mp->data; 788 p = (xtpage_t *) mp->data;
789 else 789 else
790 p = &jfs_ip->i_xtroot; 790 p = &jfs_ip->i_xtroot;
791 xtlck->lwm.offset = 791 xtlck->lwm.offset =
792 le16_to_cpu(p->header.nextindex); 792 le16_to_cpu(p->header.nextindex);
793 } 793 }
794 xtlck->lwm.length = 0; /* ! */ 794 xtlck->lwm.length = 0; /* ! */
795 xtlck->twm.offset = 0; 795 xtlck->twm.offset = 0;
796 xtlck->hwm.offset = 0; 796 xtlck->hwm.offset = 0;
797 797
798 xtlck->index = 2; 798 xtlck->index = 2;
799 break; 799 break;
800 800
801 case tlckINODE: 801 case tlckINODE:
802 linelock->l2linesize = L2INODESLOTSIZE; 802 linelock->l2linesize = L2INODESLOTSIZE;
803 break; 803 break;
804 804
805 case tlckDATA: 805 case tlckDATA:
806 linelock->l2linesize = L2DATASLOTSIZE; 806 linelock->l2linesize = L2DATASLOTSIZE;
807 break; 807 break;
808 808
809 default: 809 default:
810 jfs_err("UFO tlock:0x%p", tlck); 810 jfs_err("UFO tlock:0x%p", tlck);
811 } 811 }
812 812
813 /* 813 /*
814 * update tlock vector 814 * update tlock vector
815 */ 815 */
816 grantLock: 816 grantLock:
817 tlck->type |= type; 817 tlck->type |= type;
818 818
819 return tlck; 819 return tlck;
820 820
821 /* 821 /*
822 * page is being locked by another transaction: 822 * page is being locked by another transaction:
823 */ 823 */
824 waitLock: 824 waitLock:
825 /* Only locks on ipimap or ipaimap should reach here */ 825 /* Only locks on ipimap or ipaimap should reach here */
826 /* assert(jfs_ip->fileset == AGGREGATE_I); */ 826 /* assert(jfs_ip->fileset == AGGREGATE_I); */
827 if (jfs_ip->fileset != AGGREGATE_I) { 827 if (jfs_ip->fileset != AGGREGATE_I) {
828 jfs_err("txLock: trying to lock locked page!"); 828 jfs_err("txLock: trying to lock locked page!");
829 dump_mem("ip", ip, sizeof(struct inode)); 829 dump_mem("ip", ip, sizeof(struct inode));
830 dump_mem("mp", mp, sizeof(struct metapage)); 830 dump_mem("mp", mp, sizeof(struct metapage));
831 dump_mem("Locker's tblk", tid_to_tblock(tid), 831 dump_mem("Locker's tblk", tid_to_tblock(tid),
832 sizeof(struct tblock)); 832 sizeof(struct tblock));
833 dump_mem("Tlock", tlck, sizeof(struct tlock)); 833 dump_mem("Tlock", tlck, sizeof(struct tlock));
834 BUG(); 834 BUG();
835 } 835 }
836 INCREMENT(stattx.waitlock); /* statistics */ 836 INCREMENT(stattx.waitlock); /* statistics */
837 TXN_UNLOCK(); 837 TXN_UNLOCK();
838 release_metapage(mp); 838 release_metapage(mp);
839 TXN_LOCK(); 839 TXN_LOCK();
840 xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ 840 xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */
841 841
842 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", 842 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
843 tid, xtid, lid); 843 tid, xtid, lid);
844 844
845 /* Recheck everything since dropping TXN_LOCK */ 845 /* Recheck everything since dropping TXN_LOCK */
846 if (xtid && (tlck->mp == mp) && (mp->lid == lid)) 846 if (xtid && (tlck->mp == mp) && (mp->lid == lid))
847 TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); 847 TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
848 else 848 else
849 TXN_UNLOCK(); 849 TXN_UNLOCK();
850 jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); 850 jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid);
851 851
852 return NULL; 852 return NULL;
853 } 853 }
854 854
855 /* 855 /*
856 * NAME: txRelease() 856 * NAME: txRelease()
857 * 857 *
858 * FUNCTION: Release buffers associated with transaction locks, but don't 858 * FUNCTION: Release buffers associated with transaction locks, but don't
859 * mark homeok yet. The allows other transactions to modify 859 * mark homeok yet. The allows other transactions to modify
860 * buffers, but won't let them go to disk until commit record 860 * buffers, but won't let them go to disk until commit record
861 * actually gets written. 861 * actually gets written.
862 * 862 *
863 * PARAMETER: 863 * PARAMETER:
864 * tblk - 864 * tblk -
865 * 865 *
866 * RETURN: Errors from subroutines. 866 * RETURN: Errors from subroutines.
867 */ 867 */
868 static void txRelease(struct tblock * tblk) 868 static void txRelease(struct tblock * tblk)
869 { 869 {
870 struct metapage *mp; 870 struct metapage *mp;
871 lid_t lid; 871 lid_t lid;
872 struct tlock *tlck; 872 struct tlock *tlck;
873 873
874 TXN_LOCK(); 874 TXN_LOCK();
875 875
876 for (lid = tblk->next; lid; lid = tlck->next) { 876 for (lid = tblk->next; lid; lid = tlck->next) {
877 tlck = lid_to_tlock(lid); 877 tlck = lid_to_tlock(lid);
878 if ((mp = tlck->mp) != NULL && 878 if ((mp = tlck->mp) != NULL &&
879 (tlck->type & tlckBTROOT) == 0) { 879 (tlck->type & tlckBTROOT) == 0) {
880 assert(mp->xflag & COMMIT_PAGE); 880 assert(mp->xflag & COMMIT_PAGE);
881 mp->lid = 0; 881 mp->lid = 0;
882 } 882 }
883 } 883 }
884 884
885 /* 885 /*
886 * wakeup transactions waiting on a page locked 886 * wakeup transactions waiting on a page locked
887 * by the current transaction 887 * by the current transaction
888 */ 888 */
889 TXN_WAKEUP(&tblk->waitor); 889 TXN_WAKEUP(&tblk->waitor);
890 890
891 TXN_UNLOCK(); 891 TXN_UNLOCK();
892 } 892 }
893 893
894 /* 894 /*
895 * NAME: txUnlock() 895 * NAME: txUnlock()
896 * 896 *
897 * FUNCTION: Initiates pageout of pages modified by tid in journalled 897 * FUNCTION: Initiates pageout of pages modified by tid in journalled
898 * objects and frees their lockwords. 898 * objects and frees their lockwords.
899 */ 899 */
900 static void txUnlock(struct tblock * tblk) 900 static void txUnlock(struct tblock * tblk)
901 { 901 {
902 struct tlock *tlck; 902 struct tlock *tlck;
903 struct linelock *linelock; 903 struct linelock *linelock;
904 lid_t lid, next, llid, k; 904 lid_t lid, next, llid, k;
905 struct metapage *mp; 905 struct metapage *mp;
906 struct jfs_log *log; 906 struct jfs_log *log;
907 int difft, diffp; 907 int difft, diffp;
908 unsigned long flags; 908 unsigned long flags;
909 909
910 jfs_info("txUnlock: tblk = 0x%p", tblk); 910 jfs_info("txUnlock: tblk = 0x%p", tblk);
911 log = JFS_SBI(tblk->sb)->log; 911 log = JFS_SBI(tblk->sb)->log;
912 912
913 /* 913 /*
914 * mark page under tlock homeok (its log has been written): 914 * mark page under tlock homeok (its log has been written):
915 */ 915 */
916 for (lid = tblk->next; lid; lid = next) { 916 for (lid = tblk->next; lid; lid = next) {
917 tlck = lid_to_tlock(lid); 917 tlck = lid_to_tlock(lid);
918 next = tlck->next; 918 next = tlck->next;
919 919
920 jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); 920 jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
921 921
922 /* unbind page from tlock */ 922 /* unbind page from tlock */
923 if ((mp = tlck->mp) != NULL && 923 if ((mp = tlck->mp) != NULL &&
924 (tlck->type & tlckBTROOT) == 0) { 924 (tlck->type & tlckBTROOT) == 0) {
925 assert(mp->xflag & COMMIT_PAGE); 925 assert(mp->xflag & COMMIT_PAGE);
926 926
927 /* hold buffer 927 /* hold buffer
928 */ 928 */
929 hold_metapage(mp); 929 hold_metapage(mp);
930 930
931 assert(mp->nohomeok > 0); 931 assert(mp->nohomeok > 0);
932 _metapage_homeok(mp); 932 _metapage_homeok(mp);
933 933
934 /* inherit younger/larger clsn */ 934 /* inherit younger/larger clsn */
935 LOGSYNC_LOCK(log, flags); 935 LOGSYNC_LOCK(log, flags);
936 if (mp->clsn) { 936 if (mp->clsn) {
937 logdiff(difft, tblk->clsn, log); 937 logdiff(difft, tblk->clsn, log);
938 logdiff(diffp, mp->clsn, log); 938 logdiff(diffp, mp->clsn, log);
939 if (difft > diffp) 939 if (difft > diffp)
940 mp->clsn = tblk->clsn; 940 mp->clsn = tblk->clsn;
941 } else 941 } else
942 mp->clsn = tblk->clsn; 942 mp->clsn = tblk->clsn;
943 LOGSYNC_UNLOCK(log, flags); 943 LOGSYNC_UNLOCK(log, flags);
944 944
945 assert(!(tlck->flag & tlckFREEPAGE)); 945 assert(!(tlck->flag & tlckFREEPAGE));
946 946
947 put_metapage(mp); 947 put_metapage(mp);
948 } 948 }
949 949
950 /* insert tlock, and linelock(s) of the tlock if any, 950 /* insert tlock, and linelock(s) of the tlock if any,
951 * at head of freelist 951 * at head of freelist
952 */ 952 */
953 TXN_LOCK(); 953 TXN_LOCK();
954 954
955 llid = ((struct linelock *) & tlck->lock)->next; 955 llid = ((struct linelock *) & tlck->lock)->next;
956 while (llid) { 956 while (llid) {
957 linelock = (struct linelock *) lid_to_tlock(llid); 957 linelock = (struct linelock *) lid_to_tlock(llid);
958 k = linelock->next; 958 k = linelock->next;
959 txLockFree(llid); 959 txLockFree(llid);
960 llid = k; 960 llid = k;
961 } 961 }
962 txLockFree(lid); 962 txLockFree(lid);
963 963
964 TXN_UNLOCK(); 964 TXN_UNLOCK();
965 } 965 }
966 tblk->next = tblk->last = 0; 966 tblk->next = tblk->last = 0;
967 967
968 /* 968 /*
969 * remove tblock from logsynclist 969 * remove tblock from logsynclist
970 * (allocation map pages inherited lsn of tblk and 970 * (allocation map pages inherited lsn of tblk and
971 * has been inserted in logsync list at txUpdateMap()) 971 * has been inserted in logsync list at txUpdateMap())
972 */ 972 */
973 if (tblk->lsn) { 973 if (tblk->lsn) {
974 LOGSYNC_LOCK(log, flags); 974 LOGSYNC_LOCK(log, flags);
975 log->count--; 975 log->count--;
976 list_del(&tblk->synclist); 976 list_del(&tblk->synclist);
977 LOGSYNC_UNLOCK(log, flags); 977 LOGSYNC_UNLOCK(log, flags);
978 } 978 }
979 } 979 }
980 980
981 /* 981 /*
982 * txMaplock() 982 * txMaplock()
983 * 983 *
984 * function: allocate a transaction lock for freed page/entry; 984 * function: allocate a transaction lock for freed page/entry;
985 * for freed page, maplock is used as xtlock/dtlock type; 985 * for freed page, maplock is used as xtlock/dtlock type;
986 */ 986 */
987 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) 987 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
988 { 988 {
989 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 989 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
990 lid_t lid; 990 lid_t lid;
991 struct tblock *tblk; 991 struct tblock *tblk;
992 struct tlock *tlck; 992 struct tlock *tlck;
993 struct maplock *maplock; 993 struct maplock *maplock;
994 994
995 TXN_LOCK(); 995 TXN_LOCK();
996 996
997 /* 997 /*
998 * allocate a tlock 998 * allocate a tlock
999 */ 999 */
1000 lid = txLockAlloc(); 1000 lid = txLockAlloc();
1001 tlck = lid_to_tlock(lid); 1001 tlck = lid_to_tlock(lid);
1002 1002
1003 /* 1003 /*
1004 * initialize tlock 1004 * initialize tlock
1005 */ 1005 */
1006 tlck->tid = tid; 1006 tlck->tid = tid;
1007 1007
1008 /* bind the tlock and the object */ 1008 /* bind the tlock and the object */
1009 tlck->flag = tlckINODELOCK; 1009 tlck->flag = tlckINODELOCK;
1010 tlck->ip = ip; 1010 tlck->ip = ip;
1011 tlck->mp = NULL; 1011 tlck->mp = NULL;
1012 1012
1013 tlck->type = type; 1013 tlck->type = type;
1014 1014
1015 /* 1015 /*
1016 * enqueue transaction lock to transaction/inode 1016 * enqueue transaction lock to transaction/inode
1017 */ 1017 */
1018 /* insert the tlock at tail of transaction tlock list */ 1018 /* insert the tlock at tail of transaction tlock list */
1019 if (tid) { 1019 if (tid) {
1020 tblk = tid_to_tblock(tid); 1020 tblk = tid_to_tblock(tid);
1021 if (tblk->next) 1021 if (tblk->next)
1022 lid_to_tlock(tblk->last)->next = lid; 1022 lid_to_tlock(tblk->last)->next = lid;
1023 else 1023 else
1024 tblk->next = lid; 1024 tblk->next = lid;
1025 tlck->next = 0; 1025 tlck->next = 0;
1026 tblk->last = lid; 1026 tblk->last = lid;
1027 } 1027 }
1028 /* anonymous transaction: 1028 /* anonymous transaction:
1029 * insert the tlock at head of inode anonymous tlock list 1029 * insert the tlock at head of inode anonymous tlock list
1030 */ 1030 */
1031 else { 1031 else {
1032 tlck->next = jfs_ip->atlhead; 1032 tlck->next = jfs_ip->atlhead;
1033 jfs_ip->atlhead = lid; 1033 jfs_ip->atlhead = lid;
1034 if (tlck->next == 0) { 1034 if (tlck->next == 0) {
1035 /* This inode's first anonymous transaction */ 1035 /* This inode's first anonymous transaction */
1036 jfs_ip->atltail = lid; 1036 jfs_ip->atltail = lid;
1037 list_add_tail(&jfs_ip->anon_inode_list, 1037 list_add_tail(&jfs_ip->anon_inode_list,
1038 &TxAnchor.anon_list); 1038 &TxAnchor.anon_list);
1039 } 1039 }
1040 } 1040 }
1041 1041
1042 TXN_UNLOCK(); 1042 TXN_UNLOCK();
1043 1043
1044 /* initialize type dependent area for maplock */ 1044 /* initialize type dependent area for maplock */
1045 maplock = (struct maplock *) & tlck->lock; 1045 maplock = (struct maplock *) & tlck->lock;
1046 maplock->next = 0; 1046 maplock->next = 0;
1047 maplock->maxcnt = 0; 1047 maplock->maxcnt = 0;
1048 maplock->index = 0; 1048 maplock->index = 0;
1049 1049
1050 return tlck; 1050 return tlck;
1051 } 1051 }
1052 1052
1053 /* 1053 /*
1054 * txLinelock() 1054 * txLinelock()
1055 * 1055 *
1056 * function: allocate a transaction lock for log vector list 1056 * function: allocate a transaction lock for log vector list
1057 */ 1057 */
1058 struct linelock *txLinelock(struct linelock * tlock) 1058 struct linelock *txLinelock(struct linelock * tlock)
1059 { 1059 {
1060 lid_t lid; 1060 lid_t lid;
1061 struct tlock *tlck; 1061 struct tlock *tlck;
1062 struct linelock *linelock; 1062 struct linelock *linelock;
1063 1063
1064 TXN_LOCK(); 1064 TXN_LOCK();
1065 1065
1066 /* allocate a TxLock structure */ 1066 /* allocate a TxLock structure */
1067 lid = txLockAlloc(); 1067 lid = txLockAlloc();
1068 tlck = lid_to_tlock(lid); 1068 tlck = lid_to_tlock(lid);
1069 1069
1070 TXN_UNLOCK(); 1070 TXN_UNLOCK();
1071 1071
1072 /* initialize linelock */ 1072 /* initialize linelock */
1073 linelock = (struct linelock *) tlck; 1073 linelock = (struct linelock *) tlck;
1074 linelock->next = 0; 1074 linelock->next = 0;
1075 linelock->flag = tlckLINELOCK; 1075 linelock->flag = tlckLINELOCK;
1076 linelock->maxcnt = TLOCKLONG; 1076 linelock->maxcnt = TLOCKLONG;
1077 linelock->index = 0; 1077 linelock->index = 0;
1078 1078
1079 /* append linelock after tlock */ 1079 /* append linelock after tlock */
1080 linelock->next = tlock->next; 1080 linelock->next = tlock->next;
1081 tlock->next = lid; 1081 tlock->next = lid;
1082 1082
1083 return linelock; 1083 return linelock;
1084 } 1084 }
1085 1085
1086 /* 1086 /*
1087 * transaction commit management 1087 * transaction commit management
1088 * ----------------------------- 1088 * -----------------------------
1089 */ 1089 */
1090 1090
1091 /* 1091 /*
1092 * NAME: txCommit() 1092 * NAME: txCommit()
1093 * 1093 *
1094 * FUNCTION: commit the changes to the objects specified in 1094 * FUNCTION: commit the changes to the objects specified in
1095 * clist. For journalled segments only the 1095 * clist. For journalled segments only the
1096 * changes of the caller are committed, ie by tid. 1096 * changes of the caller are committed, ie by tid.
1097 * for non-journalled segments the data are flushed to 1097 * for non-journalled segments the data are flushed to
1098 * disk and then the change to the disk inode and indirect 1098 * disk and then the change to the disk inode and indirect
1099 * blocks committed (so blocks newly allocated to the 1099 * blocks committed (so blocks newly allocated to the
1100 * segment will be made a part of the segment atomically). 1100 * segment will be made a part of the segment atomically).
1101 * 1101 *
1102 * all of the segments specified in clist must be in 1102 * all of the segments specified in clist must be in
1103 * one file system. no more than 6 segments are needed 1103 * one file system. no more than 6 segments are needed
1104 * to handle all unix svcs. 1104 * to handle all unix svcs.
1105 * 1105 *
1106 * if the i_nlink field (i.e. disk inode link count) 1106 * if the i_nlink field (i.e. disk inode link count)
1107 * is zero, and the type of inode is a regular file or 1107 * is zero, and the type of inode is a regular file or
1108 * directory, or symbolic link , the inode is truncated 1108 * directory, or symbolic link , the inode is truncated
1109 * to zero length. the truncation is committed but the 1109 * to zero length. the truncation is committed but the
1110 * VM resources are unaffected until it is closed (see 1110 * VM resources are unaffected until it is closed (see
1111 * iput and iclose). 1111 * iput and iclose).
1112 * 1112 *
1113 * PARAMETER: 1113 * PARAMETER:
1114 * 1114 *
1115 * RETURN: 1115 * RETURN:
1116 * 1116 *
1117 * serialization: 1117 * serialization:
1118 * on entry the inode lock on each segment is assumed 1118 * on entry the inode lock on each segment is assumed
1119 * to be held. 1119 * to be held.
1120 * 1120 *
1121 * i/o error: 1121 * i/o error:
1122 */ 1122 */
1123 int txCommit(tid_t tid, /* transaction identifier */ 1123 int txCommit(tid_t tid, /* transaction identifier */
1124 int nip, /* number of inodes to commit */ 1124 int nip, /* number of inodes to commit */
1125 struct inode **iplist, /* list of inode to commit */ 1125 struct inode **iplist, /* list of inode to commit */
1126 int flag) 1126 int flag)
1127 { 1127 {
1128 int rc = 0; 1128 int rc = 0;
1129 struct commit cd; 1129 struct commit cd;
1130 struct jfs_log *log; 1130 struct jfs_log *log;
1131 struct tblock *tblk; 1131 struct tblock *tblk;
1132 struct lrd *lrd; 1132 struct lrd *lrd;
1133 int lsn; 1133 int lsn;
1134 struct inode *ip; 1134 struct inode *ip;
1135 struct jfs_inode_info *jfs_ip; 1135 struct jfs_inode_info *jfs_ip;
1136 int k, n; 1136 int k, n;
1137 ino_t top; 1137 ino_t top;
1138 struct super_block *sb; 1138 struct super_block *sb;
1139 1139
1140 jfs_info("txCommit, tid = %d, flag = %d", tid, flag); 1140 jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1141 /* is read-only file system ? */ 1141 /* is read-only file system ? */
1142 if (isReadOnly(iplist[0])) { 1142 if (isReadOnly(iplist[0])) {
1143 rc = -EROFS; 1143 rc = -EROFS;
1144 goto TheEnd; 1144 goto TheEnd;
1145 } 1145 }
1146 1146
1147 sb = cd.sb = iplist[0]->i_sb; 1147 sb = cd.sb = iplist[0]->i_sb;
1148 cd.tid = tid; 1148 cd.tid = tid;
1149 1149
1150 if (tid == 0) 1150 if (tid == 0)
1151 tid = txBegin(sb, 0); 1151 tid = txBegin(sb, 0);
1152 tblk = tid_to_tblock(tid); 1152 tblk = tid_to_tblock(tid);
1153 1153
1154 /* 1154 /*
1155 * initialize commit structure 1155 * initialize commit structure
1156 */ 1156 */
1157 log = JFS_SBI(sb)->log; 1157 log = JFS_SBI(sb)->log;
1158 cd.log = log; 1158 cd.log = log;
1159 1159
1160 /* initialize log record descriptor in commit */ 1160 /* initialize log record descriptor in commit */
1161 lrd = &cd.lrd; 1161 lrd = &cd.lrd;
1162 lrd->logtid = cpu_to_le32(tblk->logtid); 1162 lrd->logtid = cpu_to_le32(tblk->logtid);
1163 lrd->backchain = 0; 1163 lrd->backchain = 0;
1164 1164
1165 tblk->xflag |= flag; 1165 tblk->xflag |= flag;
1166 1166
1167 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) 1167 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1168 tblk->xflag |= COMMIT_LAZY; 1168 tblk->xflag |= COMMIT_LAZY;
1169 /* 1169 /*
1170 * prepare non-journaled objects for commit 1170 * prepare non-journaled objects for commit
1171 * 1171 *
1172 * flush data pages of non-journaled file 1172 * flush data pages of non-journaled file
1173 * to prevent the file getting non-initialized disk blocks 1173 * to prevent the file getting non-initialized disk blocks
1174 * in case of crash. 1174 * in case of crash.
1175 * (new blocks - ) 1175 * (new blocks - )
1176 */ 1176 */
1177 cd.iplist = iplist; 1177 cd.iplist = iplist;
1178 cd.nip = nip; 1178 cd.nip = nip;
1179 1179
1180 /* 1180 /*
1181 * acquire transaction lock on (on-disk) inodes 1181 * acquire transaction lock on (on-disk) inodes
1182 * 1182 *
1183 * update on-disk inode from in-memory inode 1183 * update on-disk inode from in-memory inode
1184 * acquiring transaction locks for AFTER records 1184 * acquiring transaction locks for AFTER records
1185 * on the on-disk inode of file object 1185 * on the on-disk inode of file object
1186 * 1186 *
1187 * sort the inodes array by inode number in descending order 1187 * sort the inodes array by inode number in descending order
1188 * to prevent deadlock when acquiring transaction lock 1188 * to prevent deadlock when acquiring transaction lock
1189 * of on-disk inodes on multiple on-disk inode pages by 1189 * of on-disk inodes on multiple on-disk inode pages by
1190 * multiple concurrent transactions 1190 * multiple concurrent transactions
1191 */ 1191 */
1192 for (k = 0; k < cd.nip; k++) { 1192 for (k = 0; k < cd.nip; k++) {
1193 top = (cd.iplist[k])->i_ino; 1193 top = (cd.iplist[k])->i_ino;
1194 for (n = k + 1; n < cd.nip; n++) { 1194 for (n = k + 1; n < cd.nip; n++) {
1195 ip = cd.iplist[n]; 1195 ip = cd.iplist[n];
1196 if (ip->i_ino > top) { 1196 if (ip->i_ino > top) {
1197 top = ip->i_ino; 1197 top = ip->i_ino;
1198 cd.iplist[n] = cd.iplist[k]; 1198 cd.iplist[n] = cd.iplist[k];
1199 cd.iplist[k] = ip; 1199 cd.iplist[k] = ip;
1200 } 1200 }
1201 } 1201 }
1202 1202
1203 ip = cd.iplist[k]; 1203 ip = cd.iplist[k];
1204 jfs_ip = JFS_IP(ip); 1204 jfs_ip = JFS_IP(ip);
1205 1205
1206 /* 1206 /*
1207 * BUGBUG - This code has temporarily been removed. The 1207 * BUGBUG - This code has temporarily been removed. The
1208 * intent is to ensure that any file data is written before 1208 * intent is to ensure that any file data is written before
1209 * the metadata is committed to the journal. This prevents 1209 * the metadata is committed to the journal. This prevents
1210 * uninitialized data from appearing in a file after the 1210 * uninitialized data from appearing in a file after the
1211 * journal has been replayed. (The uninitialized data 1211 * journal has been replayed. (The uninitialized data
1212 * could be sensitive data removed by another user.) 1212 * could be sensitive data removed by another user.)
1213 * 1213 *
1214 * The problem now is that we are holding the IWRITELOCK 1214 * The problem now is that we are holding the IWRITELOCK
1215 * on the inode, and calling filemap_fdatawrite on an 1215 * on the inode, and calling filemap_fdatawrite on an
1216 * unmapped page will cause a deadlock in jfs_get_block. 1216 * unmapped page will cause a deadlock in jfs_get_block.
1217 * 1217 *
1218 * The long term solution is to pare down the use of 1218 * The long term solution is to pare down the use of
1219 * IWRITELOCK. We are currently holding it too long. 1219 * IWRITELOCK. We are currently holding it too long.
1220 * We could also be smarter about which data pages need 1220 * We could also be smarter about which data pages need
1221 * to be written before the transaction is committed and 1221 * to be written before the transaction is committed and
1222 * when we don't need to worry about it at all. 1222 * when we don't need to worry about it at all.
1223 * 1223 *
1224 * if ((!S_ISDIR(ip->i_mode)) 1224 * if ((!S_ISDIR(ip->i_mode))
1225 * && (tblk->flag & COMMIT_DELETE) == 0) { 1225 * && (tblk->flag & COMMIT_DELETE) == 0) {
1226 * filemap_fdatawrite(ip->i_mapping); 1226 * filemap_fdatawrite(ip->i_mapping);
1227 * filemap_fdatawait(ip->i_mapping); 1227 * filemap_fdatawait(ip->i_mapping);
1228 * } 1228 * }
1229 */ 1229 */
1230 1230
1231 /* 1231 /*
1232 * Mark inode as not dirty. It will still be on the dirty 1232 * Mark inode as not dirty. It will still be on the dirty
1233 * inode list, but we'll know not to commit it again unless 1233 * inode list, but we'll know not to commit it again unless
1234 * it gets marked dirty again 1234 * it gets marked dirty again
1235 */ 1235 */
1236 clear_cflag(COMMIT_Dirty, ip); 1236 clear_cflag(COMMIT_Dirty, ip);
1237 1237
1238 /* inherit anonymous tlock(s) of inode */ 1238 /* inherit anonymous tlock(s) of inode */
1239 if (jfs_ip->atlhead) { 1239 if (jfs_ip->atlhead) {
1240 lid_to_tlock(jfs_ip->atltail)->next = tblk->next; 1240 lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1241 tblk->next = jfs_ip->atlhead; 1241 tblk->next = jfs_ip->atlhead;
1242 if (!tblk->last) 1242 if (!tblk->last)
1243 tblk->last = jfs_ip->atltail; 1243 tblk->last = jfs_ip->atltail;
1244 jfs_ip->atlhead = jfs_ip->atltail = 0; 1244 jfs_ip->atlhead = jfs_ip->atltail = 0;
1245 TXN_LOCK(); 1245 TXN_LOCK();
1246 list_del_init(&jfs_ip->anon_inode_list); 1246 list_del_init(&jfs_ip->anon_inode_list);
1247 TXN_UNLOCK(); 1247 TXN_UNLOCK();
1248 } 1248 }
1249 1249
1250 /* 1250 /*
1251 * acquire transaction lock on on-disk inode page 1251 * acquire transaction lock on on-disk inode page
1252 * (become first tlock of the tblk's tlock list) 1252 * (become first tlock of the tblk's tlock list)
1253 */ 1253 */
1254 if (((rc = diWrite(tid, ip)))) 1254 if (((rc = diWrite(tid, ip))))
1255 goto out; 1255 goto out;
1256 } 1256 }
1257 1257
1258 /* 1258 /*
1259 * write log records from transaction locks 1259 * write log records from transaction locks
1260 * 1260 *
1261 * txUpdateMap() resets XAD_NEW in XAD. 1261 * txUpdateMap() resets XAD_NEW in XAD.
1262 */ 1262 */
1263 if ((rc = txLog(log, tblk, &cd))) 1263 if ((rc = txLog(log, tblk, &cd)))
1264 goto TheEnd; 1264 goto TheEnd;
1265 1265
1266 /* 1266 /*
1267 * Ensure that inode isn't reused before 1267 * Ensure that inode isn't reused before
1268 * lazy commit thread finishes processing 1268 * lazy commit thread finishes processing
1269 */ 1269 */
1270 if (tblk->xflag & COMMIT_DELETE) { 1270 if (tblk->xflag & COMMIT_DELETE) {
1271 atomic_inc(&tblk->u.ip->i_count); 1271 atomic_inc(&tblk->u.ip->i_count);
1272 /* 1272 /*
1273 * Avoid a rare deadlock 1273 * Avoid a rare deadlock
1274 * 1274 *
1275 * If the inode is locked, we may be blocked in 1275 * If the inode is locked, we may be blocked in
1276 * jfs_commit_inode. If so, we don't want the 1276 * jfs_commit_inode. If so, we don't want the
1277 * lazy_commit thread doing the last iput() on the inode 1277 * lazy_commit thread doing the last iput() on the inode
1278 * since that may block on the locked inode. Instead, 1278 * since that may block on the locked inode. Instead,
1279 * commit the transaction synchronously, so the last iput 1279 * commit the transaction synchronously, so the last iput
1280 * will be done by the calling thread (or later) 1280 * will be done by the calling thread (or later)
1281 */ 1281 */
1282 if (tblk->u.ip->i_state & I_LOCK) 1282 if (tblk->u.ip->i_state & I_LOCK)
1283 tblk->xflag &= ~COMMIT_LAZY; 1283 tblk->xflag &= ~COMMIT_LAZY;
1284 } 1284 }
1285 1285
1286 ASSERT((!(tblk->xflag & COMMIT_DELETE)) || 1286 ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1287 ((tblk->u.ip->i_nlink == 0) && 1287 ((tblk->u.ip->i_nlink == 0) &&
1288 !test_cflag(COMMIT_Nolink, tblk->u.ip))); 1288 !test_cflag(COMMIT_Nolink, tblk->u.ip)));
1289 1289
1290 /* 1290 /*
1291 * write COMMIT log record 1291 * write COMMIT log record
1292 */ 1292 */
1293 lrd->type = cpu_to_le16(LOG_COMMIT); 1293 lrd->type = cpu_to_le16(LOG_COMMIT);
1294 lrd->length = 0; 1294 lrd->length = 0;
1295 lsn = lmLog(log, tblk, lrd, NULL); 1295 lsn = lmLog(log, tblk, lrd, NULL);
1296 1296
1297 lmGroupCommit(log, tblk); 1297 lmGroupCommit(log, tblk);
1298 1298
1299 /* 1299 /*
1300 * - transaction is now committed - 1300 * - transaction is now committed -
1301 */ 1301 */
1302 1302
1303 /* 1303 /*
1304 * force pages in careful update 1304 * force pages in careful update
1305 * (imap addressing structure update) 1305 * (imap addressing structure update)
1306 */ 1306 */
1307 if (flag & COMMIT_FORCE) 1307 if (flag & COMMIT_FORCE)
1308 txForce(tblk); 1308 txForce(tblk);
1309 1309
1310 /* 1310 /*
1311 * update allocation map. 1311 * update allocation map.
1312 * 1312 *
1313 * update inode allocation map and inode: 1313 * update inode allocation map and inode:
1314 * free pager lock on memory object of inode if any. 1314 * free pager lock on memory object of inode if any.
1315 * update block allocation map. 1315 * update block allocation map.
1316 * 1316 *
1317 * txUpdateMap() resets XAD_NEW in XAD. 1317 * txUpdateMap() resets XAD_NEW in XAD.
1318 */ 1318 */
1319 if (tblk->xflag & COMMIT_FORCE) 1319 if (tblk->xflag & COMMIT_FORCE)
1320 txUpdateMap(tblk); 1320 txUpdateMap(tblk);
1321 1321
1322 /* 1322 /*
1323 * free transaction locks and pageout/free pages 1323 * free transaction locks and pageout/free pages
1324 */ 1324 */
1325 txRelease(tblk); 1325 txRelease(tblk);
1326 1326
1327 if ((tblk->flag & tblkGC_LAZY) == 0) 1327 if ((tblk->flag & tblkGC_LAZY) == 0)
1328 txUnlock(tblk); 1328 txUnlock(tblk);
1329 1329
1330 1330
1331 /* 1331 /*
1332 * reset in-memory object state 1332 * reset in-memory object state
1333 */ 1333 */
1334 for (k = 0; k < cd.nip; k++) { 1334 for (k = 0; k < cd.nip; k++) {
1335 ip = cd.iplist[k]; 1335 ip = cd.iplist[k];
1336 jfs_ip = JFS_IP(ip); 1336 jfs_ip = JFS_IP(ip);
1337 1337
1338 /* 1338 /*
1339 * reset in-memory inode state 1339 * reset in-memory inode state
1340 */ 1340 */
1341 jfs_ip->bxflag = 0; 1341 jfs_ip->bxflag = 0;
1342 jfs_ip->blid = 0; 1342 jfs_ip->blid = 0;
1343 } 1343 }
1344 1344
1345 out: 1345 out:
1346 if (rc != 0) 1346 if (rc != 0)
1347 txAbort(tid, 1); 1347 txAbort(tid, 1);
1348 1348
1349 TheEnd: 1349 TheEnd:
1350 jfs_info("txCommit: tid = %d, returning %d", tid, rc); 1350 jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1351 return rc; 1351 return rc;
1352 } 1352 }
1353 1353
1354 /* 1354 /*
1355 * NAME: txLog() 1355 * NAME: txLog()
1356 * 1356 *
1357 * FUNCTION: Writes AFTER log records for all lines modified 1357 * FUNCTION: Writes AFTER log records for all lines modified
1358 * by tid for segments specified by inodes in comdata. 1358 * by tid for segments specified by inodes in comdata.
1359 * Code assumes only WRITELOCKS are recorded in lockwords. 1359 * Code assumes only WRITELOCKS are recorded in lockwords.
1360 * 1360 *
1361 * PARAMETERS: 1361 * PARAMETERS:
1362 * 1362 *
1363 * RETURN : 1363 * RETURN :
1364 */ 1364 */
1365 static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) 1365 static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1366 { 1366 {
1367 int rc = 0; 1367 int rc = 0;
1368 struct inode *ip; 1368 struct inode *ip;
1369 lid_t lid; 1369 lid_t lid;
1370 struct tlock *tlck; 1370 struct tlock *tlck;
1371 struct lrd *lrd = &cd->lrd; 1371 struct lrd *lrd = &cd->lrd;
1372 1372
1373 /* 1373 /*
1374 * write log record(s) for each tlock of transaction, 1374 * write log record(s) for each tlock of transaction,
1375 */ 1375 */
1376 for (lid = tblk->next; lid; lid = tlck->next) { 1376 for (lid = tblk->next; lid; lid = tlck->next) {
1377 tlck = lid_to_tlock(lid); 1377 tlck = lid_to_tlock(lid);
1378 1378
1379 tlck->flag |= tlckLOG; 1379 tlck->flag |= tlckLOG;
1380 1380
1381 /* initialize lrd common */ 1381 /* initialize lrd common */
1382 ip = tlck->ip; 1382 ip = tlck->ip;
1383 lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); 1383 lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1384 lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); 1384 lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1385 lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); 1385 lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1386 1386
1387 /* write log record of page from the tlock */ 1387 /* write log record of page from the tlock */
1388 switch (tlck->type & tlckTYPE) { 1388 switch (tlck->type & tlckTYPE) {
1389 case tlckXTREE: 1389 case tlckXTREE:
1390 xtLog(log, tblk, lrd, tlck); 1390 xtLog(log, tblk, lrd, tlck);
1391 break; 1391 break;
1392 1392
1393 case tlckDTREE: 1393 case tlckDTREE:
1394 dtLog(log, tblk, lrd, tlck); 1394 dtLog(log, tblk, lrd, tlck);
1395 break; 1395 break;
1396 1396
1397 case tlckINODE: 1397 case tlckINODE:
1398 diLog(log, tblk, lrd, tlck, cd); 1398 diLog(log, tblk, lrd, tlck, cd);
1399 break; 1399 break;
1400 1400
1401 case tlckMAP: 1401 case tlckMAP:
1402 mapLog(log, tblk, lrd, tlck); 1402 mapLog(log, tblk, lrd, tlck);
1403 break; 1403 break;
1404 1404
1405 case tlckDATA: 1405 case tlckDATA:
1406 dataLog(log, tblk, lrd, tlck); 1406 dataLog(log, tblk, lrd, tlck);
1407 break; 1407 break;
1408 1408
1409 default: 1409 default:
1410 jfs_err("UFO tlock:0x%p", tlck); 1410 jfs_err("UFO tlock:0x%p", tlck);
1411 } 1411 }
1412 } 1412 }
1413 1413
1414 return rc; 1414 return rc;
1415 } 1415 }
1416 1416
1417 /* 1417 /*
1418 * diLog() 1418 * diLog()
1419 * 1419 *
1420 * function: log inode tlock and format maplock to update bmap; 1420 * function: log inode tlock and format maplock to update bmap;
1421 */ 1421 */
1422 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1422 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1423 struct tlock * tlck, struct commit * cd) 1423 struct tlock * tlck, struct commit * cd)
1424 { 1424 {
1425 int rc = 0; 1425 int rc = 0;
1426 struct metapage *mp; 1426 struct metapage *mp;
1427 pxd_t *pxd; 1427 pxd_t *pxd;
1428 struct pxd_lock *pxdlock; 1428 struct pxd_lock *pxdlock;
1429 1429
1430 mp = tlck->mp; 1430 mp = tlck->mp;
1431 1431
1432 /* initialize as REDOPAGE record format */ 1432 /* initialize as REDOPAGE record format */
1433 lrd->log.redopage.type = cpu_to_le16(LOG_INODE); 1433 lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1434 lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); 1434 lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1435 1435
1436 pxd = &lrd->log.redopage.pxd; 1436 pxd = &lrd->log.redopage.pxd;
1437 1437
1438 /* 1438 /*
1439 * inode after image 1439 * inode after image
1440 */ 1440 */
1441 if (tlck->type & tlckENTRY) { 1441 if (tlck->type & tlckENTRY) {
1442 /* log after-image for logredo(): */ 1442 /* log after-image for logredo(): */
1443 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1443 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1444 PXDaddress(pxd, mp->index); 1444 PXDaddress(pxd, mp->index);
1445 PXDlength(pxd, 1445 PXDlength(pxd,
1446 mp->logical_size >> tblk->sb->s_blocksize_bits); 1446 mp->logical_size >> tblk->sb->s_blocksize_bits);
1447 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1447 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1448 1448
1449 /* mark page as homeward bound */ 1449 /* mark page as homeward bound */
1450 tlck->flag |= tlckWRITEPAGE; 1450 tlck->flag |= tlckWRITEPAGE;
1451 } else if (tlck->type & tlckFREE) { 1451 } else if (tlck->type & tlckFREE) {
1452 /* 1452 /*
1453 * free inode extent 1453 * free inode extent
1454 * 1454 *
1455 * (pages of the freed inode extent have been invalidated and 1455 * (pages of the freed inode extent have been invalidated and
1456 * a maplock for free of the extent has been formatted at 1456 * a maplock for free of the extent has been formatted at
1457 * txLock() time); 1457 * txLock() time);
1458 * 1458 *
1459 * the tlock had been acquired on the inode allocation map page 1459 * the tlock had been acquired on the inode allocation map page
1460 * (iag) that specifies the freed extent, even though the map 1460 * (iag) that specifies the freed extent, even though the map
1461 * page is not itself logged, to prevent pageout of the map 1461 * page is not itself logged, to prevent pageout of the map
1462 * page before the log; 1462 * page before the log;
1463 */ 1463 */
1464 1464
1465 /* log LOG_NOREDOINOEXT of the freed inode extent for 1465 /* log LOG_NOREDOINOEXT of the freed inode extent for
1466 * logredo() to start NoRedoPage filters, and to update 1466 * logredo() to start NoRedoPage filters, and to update
1467 * imap and bmap for free of the extent; 1467 * imap and bmap for free of the extent;
1468 */ 1468 */
1469 lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); 1469 lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1470 /* 1470 /*
1471 * For the LOG_NOREDOINOEXT record, we need 1471 * For the LOG_NOREDOINOEXT record, we need
1472 * to pass the IAG number and inode extent 1472 * to pass the IAG number and inode extent
1473 * index (within that IAG) from which the 1473 * index (within that IAG) from which the
1474 * the extent being released. These have been 1474 * the extent being released. These have been
1475 * passed to us in the iplist[1] and iplist[2]. 1475 * passed to us in the iplist[1] and iplist[2].
1476 */ 1476 */
1477 lrd->log.noredoinoext.iagnum = 1477 lrd->log.noredoinoext.iagnum =
1478 cpu_to_le32((u32) (size_t) cd->iplist[1]); 1478 cpu_to_le32((u32) (size_t) cd->iplist[1]);
1479 lrd->log.noredoinoext.inoext_idx = 1479 lrd->log.noredoinoext.inoext_idx =
1480 cpu_to_le32((u32) (size_t) cd->iplist[2]); 1480 cpu_to_le32((u32) (size_t) cd->iplist[2]);
1481 1481
1482 pxdlock = (struct pxd_lock *) & tlck->lock; 1482 pxdlock = (struct pxd_lock *) & tlck->lock;
1483 *pxd = pxdlock->pxd; 1483 *pxd = pxdlock->pxd;
1484 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1484 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1485 1485
1486 /* update bmap */ 1486 /* update bmap */
1487 tlck->flag |= tlckUPDATEMAP; 1487 tlck->flag |= tlckUPDATEMAP;
1488 1488
1489 /* mark page as homeward bound */ 1489 /* mark page as homeward bound */
1490 tlck->flag |= tlckWRITEPAGE; 1490 tlck->flag |= tlckWRITEPAGE;
1491 } else 1491 } else
1492 jfs_err("diLog: UFO type tlck:0x%p", tlck); 1492 jfs_err("diLog: UFO type tlck:0x%p", tlck);
1493 #ifdef _JFS_WIP 1493 #ifdef _JFS_WIP
1494 /* 1494 /*
1495 * alloc/free external EA extent 1495 * alloc/free external EA extent
1496 * 1496 *
1497 * a maplock for txUpdateMap() to update bPWMAP for alloc/free 1497 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1498 * of the extent has been formatted at txLock() time; 1498 * of the extent has been formatted at txLock() time;
1499 */ 1499 */
1500 else { 1500 else {
1501 assert(tlck->type & tlckEA); 1501 assert(tlck->type & tlckEA);
1502 1502
1503 /* log LOG_UPDATEMAP for logredo() to update bmap for 1503 /* log LOG_UPDATEMAP for logredo() to update bmap for
1504 * alloc of new (and free of old) external EA extent; 1504 * alloc of new (and free of old) external EA extent;
1505 */ 1505 */
1506 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1506 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1507 pxdlock = (struct pxd_lock *) & tlck->lock; 1507 pxdlock = (struct pxd_lock *) & tlck->lock;
1508 nlock = pxdlock->index; 1508 nlock = pxdlock->index;
1509 for (i = 0; i < nlock; i++, pxdlock++) { 1509 for (i = 0; i < nlock; i++, pxdlock++) {
1510 if (pxdlock->flag & mlckALLOCPXD) 1510 if (pxdlock->flag & mlckALLOCPXD)
1511 lrd->log.updatemap.type = 1511 lrd->log.updatemap.type =
1512 cpu_to_le16(LOG_ALLOCPXD); 1512 cpu_to_le16(LOG_ALLOCPXD);
1513 else 1513 else
1514 lrd->log.updatemap.type = 1514 lrd->log.updatemap.type =
1515 cpu_to_le16(LOG_FREEPXD); 1515 cpu_to_le16(LOG_FREEPXD);
1516 lrd->log.updatemap.nxd = cpu_to_le16(1); 1516 lrd->log.updatemap.nxd = cpu_to_le16(1);
1517 lrd->log.updatemap.pxd = pxdlock->pxd; 1517 lrd->log.updatemap.pxd = pxdlock->pxd;
1518 lrd->backchain = 1518 lrd->backchain =
1519 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1519 cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1520 } 1520 }
1521 1521
1522 /* update bmap */ 1522 /* update bmap */
1523 tlck->flag |= tlckUPDATEMAP; 1523 tlck->flag |= tlckUPDATEMAP;
1524 } 1524 }
1525 #endif /* _JFS_WIP */ 1525 #endif /* _JFS_WIP */
1526 1526
1527 return rc; 1527 return rc;
1528 } 1528 }
1529 1529
1530 /* 1530 /*
1531 * dataLog() 1531 * dataLog()
1532 * 1532 *
1533 * function: log data tlock 1533 * function: log data tlock
1534 */ 1534 */
1535 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1535 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1536 struct tlock * tlck) 1536 struct tlock * tlck)
1537 { 1537 {
1538 struct metapage *mp; 1538 struct metapage *mp;
1539 pxd_t *pxd; 1539 pxd_t *pxd;
1540 1540
1541 mp = tlck->mp; 1541 mp = tlck->mp;
1542 1542
1543 /* initialize as REDOPAGE record format */ 1543 /* initialize as REDOPAGE record format */
1544 lrd->log.redopage.type = cpu_to_le16(LOG_DATA); 1544 lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1545 lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); 1545 lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1546 1546
1547 pxd = &lrd->log.redopage.pxd; 1547 pxd = &lrd->log.redopage.pxd;
1548 1548
1549 /* log after-image for logredo(): */ 1549 /* log after-image for logredo(): */
1550 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1550 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1551 1551
1552 if (jfs_dirtable_inline(tlck->ip)) { 1552 if (jfs_dirtable_inline(tlck->ip)) {
1553 /* 1553 /*
1554 * The table has been truncated, we've must have deleted 1554 * The table has been truncated, we've must have deleted
1555 * the last entry, so don't bother logging this 1555 * the last entry, so don't bother logging this
1556 */ 1556 */
1557 mp->lid = 0; 1557 mp->lid = 0;
1558 grab_metapage(mp); 1558 grab_metapage(mp);
1559 metapage_homeok(mp); 1559 metapage_homeok(mp);
1560 discard_metapage(mp); 1560 discard_metapage(mp);
1561 tlck->mp = NULL; 1561 tlck->mp = NULL;
1562 return 0; 1562 return 0;
1563 } 1563 }
1564 1564
1565 PXDaddress(pxd, mp->index); 1565 PXDaddress(pxd, mp->index);
1566 PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); 1566 PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1567 1567
1568 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1568 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1569 1569
1570 /* mark page as homeward bound */ 1570 /* mark page as homeward bound */
1571 tlck->flag |= tlckWRITEPAGE; 1571 tlck->flag |= tlckWRITEPAGE;
1572 1572
1573 return 0; 1573 return 0;
1574 } 1574 }
1575 1575
1576 /* 1576 /*
1577 * dtLog() 1577 * dtLog()
1578 * 1578 *
1579 * function: log dtree tlock and format maplock to update bmap; 1579 * function: log dtree tlock and format maplock to update bmap;
1580 */ 1580 */
1581 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1581 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1582 struct tlock * tlck) 1582 struct tlock * tlck)
1583 { 1583 {
1584 struct metapage *mp; 1584 struct metapage *mp;
1585 struct pxd_lock *pxdlock; 1585 struct pxd_lock *pxdlock;
1586 pxd_t *pxd; 1586 pxd_t *pxd;
1587 1587
1588 mp = tlck->mp; 1588 mp = tlck->mp;
1589 1589
1590 /* initialize as REDOPAGE/NOREDOPAGE record format */ 1590 /* initialize as REDOPAGE/NOREDOPAGE record format */
1591 lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); 1591 lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1592 lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); 1592 lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1593 1593
1594 pxd = &lrd->log.redopage.pxd; 1594 pxd = &lrd->log.redopage.pxd;
1595 1595
1596 if (tlck->type & tlckBTROOT) 1596 if (tlck->type & tlckBTROOT)
1597 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1597 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1598 1598
1599 /* 1599 /*
1600 * page extension via relocation: entry insertion; 1600 * page extension via relocation: entry insertion;
1601 * page extension in-place: entry insertion; 1601 * page extension in-place: entry insertion;
1602 * new right page from page split, reinitialized in-line 1602 * new right page from page split, reinitialized in-line
1603 * root from root page split: entry insertion; 1603 * root from root page split: entry insertion;
1604 */ 1604 */
1605 if (tlck->type & (tlckNEW | tlckEXTEND)) { 1605 if (tlck->type & (tlckNEW | tlckEXTEND)) {
1606 /* log after-image of the new page for logredo(): 1606 /* log after-image of the new page for logredo():
1607 * mark log (LOG_NEW) for logredo() to initialize 1607 * mark log (LOG_NEW) for logredo() to initialize
1608 * freelist and update bmap for alloc of the new page; 1608 * freelist and update bmap for alloc of the new page;
1609 */ 1609 */
1610 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1610 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1611 if (tlck->type & tlckEXTEND) 1611 if (tlck->type & tlckEXTEND)
1612 lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); 1612 lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1613 else 1613 else
1614 lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); 1614 lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1615 PXDaddress(pxd, mp->index); 1615 PXDaddress(pxd, mp->index);
1616 PXDlength(pxd, 1616 PXDlength(pxd,
1617 mp->logical_size >> tblk->sb->s_blocksize_bits); 1617 mp->logical_size >> tblk->sb->s_blocksize_bits);
1618 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1618 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1619 1619
1620 /* format a maplock for txUpdateMap() to update bPMAP for 1620 /* format a maplock for txUpdateMap() to update bPMAP for
1621 * alloc of the new page; 1621 * alloc of the new page;
1622 */ 1622 */
1623 if (tlck->type & tlckBTROOT) 1623 if (tlck->type & tlckBTROOT)
1624 return; 1624 return;
1625 tlck->flag |= tlckUPDATEMAP; 1625 tlck->flag |= tlckUPDATEMAP;
1626 pxdlock = (struct pxd_lock *) & tlck->lock; 1626 pxdlock = (struct pxd_lock *) & tlck->lock;
1627 pxdlock->flag = mlckALLOCPXD; 1627 pxdlock->flag = mlckALLOCPXD;
1628 pxdlock->pxd = *pxd; 1628 pxdlock->pxd = *pxd;
1629 1629
1630 pxdlock->index = 1; 1630 pxdlock->index = 1;
1631 1631
1632 /* mark page as homeward bound */ 1632 /* mark page as homeward bound */
1633 tlck->flag |= tlckWRITEPAGE; 1633 tlck->flag |= tlckWRITEPAGE;
1634 return; 1634 return;
1635 } 1635 }
1636 1636
1637 /* 1637 /*
1638 * entry insertion/deletion, 1638 * entry insertion/deletion,
1639 * sibling page link update (old right page before split); 1639 * sibling page link update (old right page before split);
1640 */ 1640 */
1641 if (tlck->type & (tlckENTRY | tlckRELINK)) { 1641 if (tlck->type & (tlckENTRY | tlckRELINK)) {
1642 /* log after-image for logredo(): */ 1642 /* log after-image for logredo(): */
1643 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1643 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1644 PXDaddress(pxd, mp->index); 1644 PXDaddress(pxd, mp->index);
1645 PXDlength(pxd, 1645 PXDlength(pxd,
1646 mp->logical_size >> tblk->sb->s_blocksize_bits); 1646 mp->logical_size >> tblk->sb->s_blocksize_bits);
1647 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1647 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1648 1648
1649 /* mark page as homeward bound */ 1649 /* mark page as homeward bound */
1650 tlck->flag |= tlckWRITEPAGE; 1650 tlck->flag |= tlckWRITEPAGE;
1651 return; 1651 return;
1652 } 1652 }
1653 1653
1654 /* 1654 /*
1655 * page deletion: page has been invalidated 1655 * page deletion: page has been invalidated
1656 * page relocation: source extent 1656 * page relocation: source extent
1657 * 1657 *
1658 * a maplock for free of the page has been formatted 1658 * a maplock for free of the page has been formatted
1659 * at txLock() time); 1659 * at txLock() time);
1660 */ 1660 */
1661 if (tlck->type & (tlckFREE | tlckRELOCATE)) { 1661 if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1662 /* log LOG_NOREDOPAGE of the deleted page for logredo() 1662 /* log LOG_NOREDOPAGE of the deleted page for logredo()
1663 * to start NoRedoPage filter and to update bmap for free 1663 * to start NoRedoPage filter and to update bmap for free
1664 * of the deletd page 1664 * of the deletd page
1665 */ 1665 */
1666 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1666 lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1667 pxdlock = (struct pxd_lock *) & tlck->lock; 1667 pxdlock = (struct pxd_lock *) & tlck->lock;
1668 *pxd = pxdlock->pxd; 1668 *pxd = pxdlock->pxd;
1669 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1669 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1670 1670
1671 /* a maplock for txUpdateMap() for free of the page 1671 /* a maplock for txUpdateMap() for free of the page
1672 * has been formatted at txLock() time; 1672 * has been formatted at txLock() time;
1673 */ 1673 */
1674 tlck->flag |= tlckUPDATEMAP; 1674 tlck->flag |= tlckUPDATEMAP;
1675 } 1675 }
1676 return; 1676 return;
1677 } 1677 }
1678 1678
1679 /* 1679 /*
1680 * xtLog() 1680 * xtLog()
1681 * 1681 *
1682 * function: log xtree tlock and format maplock to update bmap; 1682 * function: log xtree tlock and format maplock to update bmap;
1683 */ 1683 */
1684 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1684 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1685 struct tlock * tlck) 1685 struct tlock * tlck)
1686 { 1686 {
1687 struct inode *ip; 1687 struct inode *ip;
1688 struct metapage *mp; 1688 struct metapage *mp;
1689 xtpage_t *p; 1689 xtpage_t *p;
1690 struct xtlock *xtlck; 1690 struct xtlock *xtlck;
1691 struct maplock *maplock; 1691 struct maplock *maplock;
1692 struct xdlistlock *xadlock; 1692 struct xdlistlock *xadlock;
1693 struct pxd_lock *pxdlock; 1693 struct pxd_lock *pxdlock;
1694 pxd_t *page_pxd; 1694 pxd_t *page_pxd;
1695 int next, lwm, hwm; 1695 int next, lwm, hwm;
1696 1696
1697 ip = tlck->ip; 1697 ip = tlck->ip;
1698 mp = tlck->mp; 1698 mp = tlck->mp;
1699 1699
1700 /* initialize as REDOPAGE/NOREDOPAGE record format */ 1700 /* initialize as REDOPAGE/NOREDOPAGE record format */
1701 lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); 1701 lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1702 lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); 1702 lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1703 1703
1704 page_pxd = &lrd->log.redopage.pxd; 1704 page_pxd = &lrd->log.redopage.pxd;
1705 1705
1706 if (tlck->type & tlckBTROOT) { 1706 if (tlck->type & tlckBTROOT) {
1707 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1707 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1708 p = &JFS_IP(ip)->i_xtroot; 1708 p = &JFS_IP(ip)->i_xtroot;
1709 if (S_ISDIR(ip->i_mode)) 1709 if (S_ISDIR(ip->i_mode))
1710 lrd->log.redopage.type |= 1710 lrd->log.redopage.type |=
1711 cpu_to_le16(LOG_DIR_XTREE); 1711 cpu_to_le16(LOG_DIR_XTREE);
1712 } else 1712 } else
1713 p = (xtpage_t *) mp->data; 1713 p = (xtpage_t *) mp->data;
1714 next = le16_to_cpu(p->header.nextindex); 1714 next = le16_to_cpu(p->header.nextindex);
1715 1715
1716 xtlck = (struct xtlock *) & tlck->lock; 1716 xtlck = (struct xtlock *) & tlck->lock;
1717 1717
1718 maplock = (struct maplock *) & tlck->lock; 1718 maplock = (struct maplock *) & tlck->lock;
1719 xadlock = (struct xdlistlock *) maplock; 1719 xadlock = (struct xdlistlock *) maplock;
1720 1720
1721 /* 1721 /*
1722 * entry insertion/extension; 1722 * entry insertion/extension;
1723 * sibling page link update (old right page before split); 1723 * sibling page link update (old right page before split);
1724 */ 1724 */
1725 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { 1725 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1726 /* log after-image for logredo(): 1726 /* log after-image for logredo():
1727 * logredo() will update bmap for alloc of new/extended 1727 * logredo() will update bmap for alloc of new/extended
1728 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from 1728 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1729 * after-image of XADlist; 1729 * after-image of XADlist;
1730 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when 1730 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1731 * applying the after-image to the meta-data page. 1731 * applying the after-image to the meta-data page.
1732 */ 1732 */
1733 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1733 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1734 PXDaddress(page_pxd, mp->index); 1734 PXDaddress(page_pxd, mp->index);
1735 PXDlength(page_pxd, 1735 PXDlength(page_pxd,
1736 mp->logical_size >> tblk->sb->s_blocksize_bits); 1736 mp->logical_size >> tblk->sb->s_blocksize_bits);
1737 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1737 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1738 1738
1739 /* format a maplock for txUpdateMap() to update bPMAP 1739 /* format a maplock for txUpdateMap() to update bPMAP
1740 * for alloc of new/extended extents of XAD[lwm:next) 1740 * for alloc of new/extended extents of XAD[lwm:next)
1741 * from the page itself; 1741 * from the page itself;
1742 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. 1742 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1743 */ 1743 */
1744 lwm = xtlck->lwm.offset; 1744 lwm = xtlck->lwm.offset;
1745 if (lwm == 0) 1745 if (lwm == 0)
1746 lwm = XTPAGEMAXSLOT; 1746 lwm = XTPAGEMAXSLOT;
1747 1747
1748 if (lwm == next) 1748 if (lwm == next)
1749 goto out; 1749 goto out;
1750 if (lwm > next) { 1750 if (lwm > next) {
1751 jfs_err("xtLog: lwm > next\n"); 1751 jfs_err("xtLog: lwm > next\n");
1752 goto out; 1752 goto out;
1753 } 1753 }
1754 tlck->flag |= tlckUPDATEMAP; 1754 tlck->flag |= tlckUPDATEMAP;
1755 xadlock->flag = mlckALLOCXADLIST; 1755 xadlock->flag = mlckALLOCXADLIST;
1756 xadlock->count = next - lwm; 1756 xadlock->count = next - lwm;
1757 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { 1757 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1758 int i; 1758 int i;
1759 pxd_t *pxd; 1759 pxd_t *pxd;
1760 /* 1760 /*
1761 * Lazy commit may allow xtree to be modified before 1761 * Lazy commit may allow xtree to be modified before
1762 * txUpdateMap runs. Copy xad into linelock to 1762 * txUpdateMap runs. Copy xad into linelock to
1763 * preserve correct data. 1763 * preserve correct data.
1764 * 1764 *
1765 * We can fit twice as may pxd's as xads in the lock 1765 * We can fit twice as may pxd's as xads in the lock
1766 */ 1766 */
1767 xadlock->flag = mlckALLOCPXDLIST; 1767 xadlock->flag = mlckALLOCPXDLIST;
1768 pxd = xadlock->xdlist = &xtlck->pxdlock; 1768 pxd = xadlock->xdlist = &xtlck->pxdlock;
1769 for (i = 0; i < xadlock->count; i++) { 1769 for (i = 0; i < xadlock->count; i++) {
1770 PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); 1770 PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1771 PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); 1771 PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1772 p->xad[lwm + i].flag &= 1772 p->xad[lwm + i].flag &=
1773 ~(XAD_NEW | XAD_EXTENDED); 1773 ~(XAD_NEW | XAD_EXTENDED);
1774 pxd++; 1774 pxd++;
1775 } 1775 }
1776 } else { 1776 } else {
1777 /* 1777 /*
1778 * xdlist will point to into inode's xtree, ensure 1778 * xdlist will point to into inode's xtree, ensure
1779 * that transaction is not committed lazily. 1779 * that transaction is not committed lazily.
1780 */ 1780 */
1781 xadlock->flag = mlckALLOCXADLIST; 1781 xadlock->flag = mlckALLOCXADLIST;
1782 xadlock->xdlist = &p->xad[lwm]; 1782 xadlock->xdlist = &p->xad[lwm];
1783 tblk->xflag &= ~COMMIT_LAZY; 1783 tblk->xflag &= ~COMMIT_LAZY;
1784 } 1784 }
1785 jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d " 1785 jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d "
1786 "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); 1786 "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count);
1787 1787
1788 maplock->index = 1; 1788 maplock->index = 1;
1789 1789
1790 out: 1790 out:
1791 /* mark page as homeward bound */ 1791 /* mark page as homeward bound */
1792 tlck->flag |= tlckWRITEPAGE; 1792 tlck->flag |= tlckWRITEPAGE;
1793 1793
1794 return; 1794 return;
1795 } 1795 }
1796 1796
1797 /* 1797 /*
1798 * page deletion: file deletion/truncation (ref. xtTruncate()) 1798 * page deletion: file deletion/truncation (ref. xtTruncate())
1799 * 1799 *
1800 * (page will be invalidated after log is written and bmap 1800 * (page will be invalidated after log is written and bmap
1801 * is updated from the page); 1801 * is updated from the page);
1802 */ 1802 */
1803 if (tlck->type & tlckFREE) { 1803 if (tlck->type & tlckFREE) {
1804 /* LOG_NOREDOPAGE log for NoRedoPage filter: 1804 /* LOG_NOREDOPAGE log for NoRedoPage filter:
1805 * if page free from file delete, NoRedoFile filter from 1805 * if page free from file delete, NoRedoFile filter from
1806 * inode image of zero link count will subsume NoRedoPage 1806 * inode image of zero link count will subsume NoRedoPage
1807 * filters for each page; 1807 * filters for each page;
1808 * if page free from file truncattion, write NoRedoPage 1808 * if page free from file truncattion, write NoRedoPage
1809 * filter; 1809 * filter;
1810 * 1810 *
1811 * upadte of block allocation map for the page itself: 1811 * upadte of block allocation map for the page itself:
1812 * if page free from deletion and truncation, LOG_UPDATEMAP 1812 * if page free from deletion and truncation, LOG_UPDATEMAP
1813 * log for the page itself is generated from processing 1813 * log for the page itself is generated from processing
1814 * its parent page xad entries; 1814 * its parent page xad entries;
1815 */ 1815 */
1816 /* if page free from file truncation, log LOG_NOREDOPAGE 1816 /* if page free from file truncation, log LOG_NOREDOPAGE
1817 * of the deleted page for logredo() to start NoRedoPage 1817 * of the deleted page for logredo() to start NoRedoPage
1818 * filter for the page; 1818 * filter for the page;
1819 */ 1819 */
1820 if (tblk->xflag & COMMIT_TRUNCATE) { 1820 if (tblk->xflag & COMMIT_TRUNCATE) {
1821 /* write NOREDOPAGE for the page */ 1821 /* write NOREDOPAGE for the page */
1822 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1822 lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1823 PXDaddress(page_pxd, mp->index); 1823 PXDaddress(page_pxd, mp->index);
1824 PXDlength(page_pxd, 1824 PXDlength(page_pxd,
1825 mp->logical_size >> tblk->sb-> 1825 mp->logical_size >> tblk->sb->
1826 s_blocksize_bits); 1826 s_blocksize_bits);
1827 lrd->backchain = 1827 lrd->backchain =
1828 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1828 cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1829 1829
1830 if (tlck->type & tlckBTROOT) { 1830 if (tlck->type & tlckBTROOT) {
1831 /* Empty xtree must be logged */ 1831 /* Empty xtree must be logged */
1832 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1832 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1833 lrd->backchain = 1833 lrd->backchain =
1834 cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1834 cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1835 } 1835 }
1836 } 1836 }
1837 1837
1838 /* init LOG_UPDATEMAP of the freed extents 1838 /* init LOG_UPDATEMAP of the freed extents
1839 * XAD[XTENTRYSTART:hwm) from the deleted page itself 1839 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1840 * for logredo() to update bmap; 1840 * for logredo() to update bmap;
1841 */ 1841 */
1842 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1842 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1843 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); 1843 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1844 xtlck = (struct xtlock *) & tlck->lock; 1844 xtlck = (struct xtlock *) & tlck->lock;
1845 hwm = xtlck->hwm.offset; 1845 hwm = xtlck->hwm.offset;
1846 lrd->log.updatemap.nxd = 1846 lrd->log.updatemap.nxd =
1847 cpu_to_le16(hwm - XTENTRYSTART + 1); 1847 cpu_to_le16(hwm - XTENTRYSTART + 1);
1848 /* reformat linelock for lmLog() */ 1848 /* reformat linelock for lmLog() */
1849 xtlck->header.offset = XTENTRYSTART; 1849 xtlck->header.offset = XTENTRYSTART;
1850 xtlck->header.length = hwm - XTENTRYSTART + 1; 1850 xtlck->header.length = hwm - XTENTRYSTART + 1;
1851 xtlck->index = 1; 1851 xtlck->index = 1;
1852 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1852 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1853 1853
1854 /* format a maplock for txUpdateMap() to update bmap 1854 /* format a maplock for txUpdateMap() to update bmap
1855 * to free extents of XAD[XTENTRYSTART:hwm) from the 1855 * to free extents of XAD[XTENTRYSTART:hwm) from the
1856 * deleted page itself; 1856 * deleted page itself;
1857 */ 1857 */
1858 tlck->flag |= tlckUPDATEMAP; 1858 tlck->flag |= tlckUPDATEMAP;
1859 xadlock->count = hwm - XTENTRYSTART + 1; 1859 xadlock->count = hwm - XTENTRYSTART + 1;
1860 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { 1860 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1861 int i; 1861 int i;
1862 pxd_t *pxd; 1862 pxd_t *pxd;
1863 /* 1863 /*
1864 * Lazy commit may allow xtree to be modified before 1864 * Lazy commit may allow xtree to be modified before
1865 * txUpdateMap runs. Copy xad into linelock to 1865 * txUpdateMap runs. Copy xad into linelock to
1866 * preserve correct data. 1866 * preserve correct data.
1867 * 1867 *
1868 * We can fit twice as may pxd's as xads in the lock 1868 * We can fit twice as may pxd's as xads in the lock
1869 */ 1869 */
1870 xadlock->flag = mlckFREEPXDLIST; 1870 xadlock->flag = mlckFREEPXDLIST;
1871 pxd = xadlock->xdlist = &xtlck->pxdlock; 1871 pxd = xadlock->xdlist = &xtlck->pxdlock;
1872 for (i = 0; i < xadlock->count; i++) { 1872 for (i = 0; i < xadlock->count; i++) {
1873 PXDaddress(pxd, 1873 PXDaddress(pxd,
1874 addressXAD(&p->xad[XTENTRYSTART + i])); 1874 addressXAD(&p->xad[XTENTRYSTART + i]));
1875 PXDlength(pxd, 1875 PXDlength(pxd,
1876 lengthXAD(&p->xad[XTENTRYSTART + i])); 1876 lengthXAD(&p->xad[XTENTRYSTART + i]));
1877 pxd++; 1877 pxd++;
1878 } 1878 }
1879 } else { 1879 } else {
1880 /* 1880 /*
1881 * xdlist will point to into inode's xtree, ensure 1881 * xdlist will point to into inode's xtree, ensure
1882 * that transaction is not committed lazily. 1882 * that transaction is not committed lazily.
1883 */ 1883 */
1884 xadlock->flag = mlckFREEXADLIST; 1884 xadlock->flag = mlckFREEXADLIST;
1885 xadlock->xdlist = &p->xad[XTENTRYSTART]; 1885 xadlock->xdlist = &p->xad[XTENTRYSTART];
1886 tblk->xflag &= ~COMMIT_LAZY; 1886 tblk->xflag &= ~COMMIT_LAZY;
1887 } 1887 }
1888 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", 1888 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1889 tlck->ip, mp, xadlock->count); 1889 tlck->ip, mp, xadlock->count);
1890 1890
1891 maplock->index = 1; 1891 maplock->index = 1;
1892 1892
1893 /* mark page as invalid */ 1893 /* mark page as invalid */
1894 if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) 1894 if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1895 && !(tlck->type & tlckBTROOT)) 1895 && !(tlck->type & tlckBTROOT))
1896 tlck->flag |= tlckFREEPAGE; 1896 tlck->flag |= tlckFREEPAGE;
1897 /* 1897 /*
1898 else (tblk->xflag & COMMIT_PMAP) 1898 else (tblk->xflag & COMMIT_PMAP)
1899 ? release the page; 1899 ? release the page;
1900 */ 1900 */
1901 return; 1901 return;
1902 } 1902 }
1903 1903
1904 /* 1904 /*
1905 * page/entry truncation: file truncation (ref. xtTruncate()) 1905 * page/entry truncation: file truncation (ref. xtTruncate())
1906 * 1906 *
1907 * |----------+------+------+---------------| 1907 * |----------+------+------+---------------|
1908 * | | | 1908 * | | |
1909 * | | hwm - hwm before truncation 1909 * | | hwm - hwm before truncation
1910 * | next - truncation point 1910 * | next - truncation point
1911 * lwm - lwm before truncation 1911 * lwm - lwm before truncation
1912 * header ? 1912 * header ?
1913 */ 1913 */
1914 if (tlck->type & tlckTRUNCATE) { 1914 if (tlck->type & tlckTRUNCATE) {
1915 pxd_t pxd; /* truncated extent of xad */ 1915 pxd_t pxd; /* truncated extent of xad */
1916 int twm; 1916 int twm;
1917 1917
1918 /* 1918 /*
1919 * For truncation the entire linelock may be used, so it would 1919 * For truncation the entire linelock may be used, so it would
1920 * be difficult to store xad list in linelock itself. 1920 * be difficult to store xad list in linelock itself.
1921 * Therefore, we'll just force transaction to be committed 1921 * Therefore, we'll just force transaction to be committed
1922 * synchronously, so that xtree pages won't be changed before 1922 * synchronously, so that xtree pages won't be changed before
1923 * txUpdateMap runs. 1923 * txUpdateMap runs.
1924 */ 1924 */
1925 tblk->xflag &= ~COMMIT_LAZY; 1925 tblk->xflag &= ~COMMIT_LAZY;
1926 lwm = xtlck->lwm.offset; 1926 lwm = xtlck->lwm.offset;
1927 if (lwm == 0) 1927 if (lwm == 0)
1928 lwm = XTPAGEMAXSLOT; 1928 lwm = XTPAGEMAXSLOT;
1929 hwm = xtlck->hwm.offset; 1929 hwm = xtlck->hwm.offset;
1930 twm = xtlck->twm.offset; 1930 twm = xtlck->twm.offset;
1931 1931
1932 /* 1932 /*
1933 * write log records 1933 * write log records
1934 */ 1934 */
1935 /* log after-image for logredo(): 1935 /* log after-image for logredo():
1936 * 1936 *
1937 * logredo() will update bmap for alloc of new/extended 1937 * logredo() will update bmap for alloc of new/extended
1938 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from 1938 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1939 * after-image of XADlist; 1939 * after-image of XADlist;
1940 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when 1940 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1941 * applying the after-image to the meta-data page. 1941 * applying the after-image to the meta-data page.
1942 */ 1942 */
1943 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1943 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1944 PXDaddress(page_pxd, mp->index); 1944 PXDaddress(page_pxd, mp->index);
1945 PXDlength(page_pxd, 1945 PXDlength(page_pxd,
1946 mp->logical_size >> tblk->sb->s_blocksize_bits); 1946 mp->logical_size >> tblk->sb->s_blocksize_bits);
1947 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1947 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1948 1948
1949 /* 1949 /*
1950 * truncate entry XAD[twm == next - 1]: 1950 * truncate entry XAD[twm == next - 1]:
1951 */ 1951 */
1952 if (twm == next - 1) { 1952 if (twm == next - 1) {
1953 /* init LOG_UPDATEMAP for logredo() to update bmap for 1953 /* init LOG_UPDATEMAP for logredo() to update bmap for
1954 * free of truncated delta extent of the truncated 1954 * free of truncated delta extent of the truncated
1955 * entry XAD[next - 1]: 1955 * entry XAD[next - 1]:
1956 * (xtlck->pxdlock = truncated delta extent); 1956 * (xtlck->pxdlock = truncated delta extent);
1957 */ 1957 */
1958 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 1958 pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1959 /* assert(pxdlock->type & tlckTRUNCATE); */ 1959 /* assert(pxdlock->type & tlckTRUNCATE); */
1960 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1960 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1961 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 1961 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1962 lrd->log.updatemap.nxd = cpu_to_le16(1); 1962 lrd->log.updatemap.nxd = cpu_to_le16(1);
1963 lrd->log.updatemap.pxd = pxdlock->pxd; 1963 lrd->log.updatemap.pxd = pxdlock->pxd;
1964 pxd = pxdlock->pxd; /* save to format maplock */ 1964 pxd = pxdlock->pxd; /* save to format maplock */
1965 lrd->backchain = 1965 lrd->backchain =
1966 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1966 cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1967 } 1967 }
1968 1968
1969 /* 1969 /*
1970 * free entries XAD[next:hwm]: 1970 * free entries XAD[next:hwm]:
1971 */ 1971 */
1972 if (hwm >= next) { 1972 if (hwm >= next) {
1973 /* init LOG_UPDATEMAP of the freed extents 1973 /* init LOG_UPDATEMAP of the freed extents
1974 * XAD[next:hwm] from the deleted page itself 1974 * XAD[next:hwm] from the deleted page itself
1975 * for logredo() to update bmap; 1975 * for logredo() to update bmap;
1976 */ 1976 */
1977 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1977 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1978 lrd->log.updatemap.type = 1978 lrd->log.updatemap.type =
1979 cpu_to_le16(LOG_FREEXADLIST); 1979 cpu_to_le16(LOG_FREEXADLIST);
1980 xtlck = (struct xtlock *) & tlck->lock; 1980 xtlck = (struct xtlock *) & tlck->lock;
1981 hwm = xtlck->hwm.offset; 1981 hwm = xtlck->hwm.offset;
1982 lrd->log.updatemap.nxd = 1982 lrd->log.updatemap.nxd =
1983 cpu_to_le16(hwm - next + 1); 1983 cpu_to_le16(hwm - next + 1);
1984 /* reformat linelock for lmLog() */ 1984 /* reformat linelock for lmLog() */
1985 xtlck->header.offset = next; 1985 xtlck->header.offset = next;
1986 xtlck->header.length = hwm - next + 1; 1986 xtlck->header.length = hwm - next + 1;
1987 xtlck->index = 1; 1987 xtlck->index = 1;
1988 lrd->backchain = 1988 lrd->backchain =
1989 cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1989 cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1990 } 1990 }
1991 1991
1992 /* 1992 /*
1993 * format maplock(s) for txUpdateMap() to update bmap 1993 * format maplock(s) for txUpdateMap() to update bmap
1994 */ 1994 */
1995 maplock->index = 0; 1995 maplock->index = 0;
1996 1996
1997 /* 1997 /*
1998 * allocate entries XAD[lwm:next): 1998 * allocate entries XAD[lwm:next):
1999 */ 1999 */
2000 if (lwm < next) { 2000 if (lwm < next) {
2001 /* format a maplock for txUpdateMap() to update bPMAP 2001 /* format a maplock for txUpdateMap() to update bPMAP
2002 * for alloc of new/extended extents of XAD[lwm:next) 2002 * for alloc of new/extended extents of XAD[lwm:next)
2003 * from the page itself; 2003 * from the page itself;
2004 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. 2004 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
2005 */ 2005 */
2006 tlck->flag |= tlckUPDATEMAP; 2006 tlck->flag |= tlckUPDATEMAP;
2007 xadlock->flag = mlckALLOCXADLIST; 2007 xadlock->flag = mlckALLOCXADLIST;
2008 xadlock->count = next - lwm; 2008 xadlock->count = next - lwm;
2009 xadlock->xdlist = &p->xad[lwm]; 2009 xadlock->xdlist = &p->xad[lwm];
2010 2010
2011 jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d " 2011 jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d "
2012 "lwm:%d next:%d", 2012 "lwm:%d next:%d",
2013 tlck->ip, mp, xadlock->count, lwm, next); 2013 tlck->ip, mp, xadlock->count, lwm, next);
2014 maplock->index++; 2014 maplock->index++;
2015 xadlock++; 2015 xadlock++;
2016 } 2016 }
2017 2017
2018 /* 2018 /*
2019 * truncate entry XAD[twm == next - 1]: 2019 * truncate entry XAD[twm == next - 1]:
2020 */ 2020 */
2021 if (twm == next - 1) { 2021 if (twm == next - 1) {
2022 struct pxd_lock *pxdlock; 2022 struct pxd_lock *pxdlock;
2023 2023
2024 /* format a maplock for txUpdateMap() to update bmap 2024 /* format a maplock for txUpdateMap() to update bmap
2025 * to free truncated delta extent of the truncated 2025 * to free truncated delta extent of the truncated
2026 * entry XAD[next - 1]; 2026 * entry XAD[next - 1];
2027 * (xtlck->pxdlock = truncated delta extent); 2027 * (xtlck->pxdlock = truncated delta extent);
2028 */ 2028 */
2029 tlck->flag |= tlckUPDATEMAP; 2029 tlck->flag |= tlckUPDATEMAP;
2030 pxdlock = (struct pxd_lock *) xadlock; 2030 pxdlock = (struct pxd_lock *) xadlock;
2031 pxdlock->flag = mlckFREEPXD; 2031 pxdlock->flag = mlckFREEPXD;
2032 pxdlock->count = 1; 2032 pxdlock->count = 1;
2033 pxdlock->pxd = pxd; 2033 pxdlock->pxd = pxd;
2034 2034
2035 jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " 2035 jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
2036 "hwm:%d", ip, mp, pxdlock->count, hwm); 2036 "hwm:%d", ip, mp, pxdlock->count, hwm);
2037 maplock->index++; 2037 maplock->index++;
2038 xadlock++; 2038 xadlock++;
2039 } 2039 }
2040 2040
2041 /* 2041 /*
2042 * free entries XAD[next:hwm]: 2042 * free entries XAD[next:hwm]:
2043 */ 2043 */
2044 if (hwm >= next) { 2044 if (hwm >= next) {
2045 /* format a maplock for txUpdateMap() to update bmap 2045 /* format a maplock for txUpdateMap() to update bmap
2046 * to free extents of XAD[next:hwm] from thedeleted 2046 * to free extents of XAD[next:hwm] from thedeleted
2047 * page itself; 2047 * page itself;
2048 */ 2048 */
2049 tlck->flag |= tlckUPDATEMAP; 2049 tlck->flag |= tlckUPDATEMAP;
2050 xadlock->flag = mlckFREEXADLIST; 2050 xadlock->flag = mlckFREEXADLIST;
2051 xadlock->count = hwm - next + 1; 2051 xadlock->count = hwm - next + 1;
2052 xadlock->xdlist = &p->xad[next]; 2052 xadlock->xdlist = &p->xad[next];
2053 2053
2054 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d " 2054 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d "
2055 "next:%d hwm:%d", 2055 "next:%d hwm:%d",
2056 tlck->ip, mp, xadlock->count, next, hwm); 2056 tlck->ip, mp, xadlock->count, next, hwm);
2057 maplock->index++; 2057 maplock->index++;
2058 } 2058 }
2059 2059
2060 /* mark page as homeward bound */ 2060 /* mark page as homeward bound */
2061 tlck->flag |= tlckWRITEPAGE; 2061 tlck->flag |= tlckWRITEPAGE;
2062 } 2062 }
2063 return; 2063 return;
2064 } 2064 }
2065 2065
2066 /* 2066 /*
2067 * mapLog() 2067 * mapLog()
2068 * 2068 *
2069 * function: log from maplock of freed data extents; 2069 * function: log from maplock of freed data extents;
2070 */ 2070 */
2071 void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 2071 void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2072 struct tlock * tlck) 2072 struct tlock * tlck)
2073 { 2073 {
2074 struct pxd_lock *pxdlock; 2074 struct pxd_lock *pxdlock;
2075 int i, nlock; 2075 int i, nlock;
2076 pxd_t *pxd; 2076 pxd_t *pxd;
2077 2077
2078 /* 2078 /*
2079 * page relocation: free the source page extent 2079 * page relocation: free the source page extent
2080 * 2080 *
2081 * a maplock for txUpdateMap() for free of the page 2081 * a maplock for txUpdateMap() for free of the page
2082 * has been formatted at txLock() time saving the src 2082 * has been formatted at txLock() time saving the src
2083 * relocated page address; 2083 * relocated page address;
2084 */ 2084 */
2085 if (tlck->type & tlckRELOCATE) { 2085 if (tlck->type & tlckRELOCATE) {
2086 /* log LOG_NOREDOPAGE of the old relocated page 2086 /* log LOG_NOREDOPAGE of the old relocated page
2087 * for logredo() to start NoRedoPage filter; 2087 * for logredo() to start NoRedoPage filter;
2088 */ 2088 */
2089 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 2089 lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2090 pxdlock = (struct pxd_lock *) & tlck->lock; 2090 pxdlock = (struct pxd_lock *) & tlck->lock;
2091 pxd = &lrd->log.redopage.pxd; 2091 pxd = &lrd->log.redopage.pxd;
2092 *pxd = pxdlock->pxd; 2092 *pxd = pxdlock->pxd;
2093 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2093 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2094 2094
2095 /* (N.B. currently, logredo() does NOT update bmap 2095 /* (N.B. currently, logredo() does NOT update bmap
2096 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); 2096 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2097 * if page free from relocation, LOG_UPDATEMAP log is 2097 * if page free from relocation, LOG_UPDATEMAP log is
2098 * specifically generated now for logredo() 2098 * specifically generated now for logredo()
2099 * to update bmap for free of src relocated page; 2099 * to update bmap for free of src relocated page;
2100 * (new flag LOG_RELOCATE may be introduced which will 2100 * (new flag LOG_RELOCATE may be introduced which will
2101 * inform logredo() to start NORedoPage filter and also 2101 * inform logredo() to start NORedoPage filter and also
2102 * update block allocation map at the same time, thus 2102 * update block allocation map at the same time, thus
2103 * avoiding an extra log write); 2103 * avoiding an extra log write);
2104 */ 2104 */
2105 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 2105 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2106 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 2106 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2107 lrd->log.updatemap.nxd = cpu_to_le16(1); 2107 lrd->log.updatemap.nxd = cpu_to_le16(1);
2108 lrd->log.updatemap.pxd = pxdlock->pxd; 2108 lrd->log.updatemap.pxd = pxdlock->pxd;
2109 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2109 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2110 2110
2111 /* a maplock for txUpdateMap() for free of the page 2111 /* a maplock for txUpdateMap() for free of the page
2112 * has been formatted at txLock() time; 2112 * has been formatted at txLock() time;
2113 */ 2113 */
2114 tlck->flag |= tlckUPDATEMAP; 2114 tlck->flag |= tlckUPDATEMAP;
2115 return; 2115 return;
2116 } 2116 }
2117 /* 2117 /*
2118 2118
2119 * Otherwise it's not a relocate request 2119 * Otherwise it's not a relocate request
2120 * 2120 *
2121 */ 2121 */
2122 else { 2122 else {
2123 /* log LOG_UPDATEMAP for logredo() to update bmap for 2123 /* log LOG_UPDATEMAP for logredo() to update bmap for
2124 * free of truncated/relocated delta extent of the data; 2124 * free of truncated/relocated delta extent of the data;
2125 * e.g.: external EA extent, relocated/truncated extent 2125 * e.g.: external EA extent, relocated/truncated extent
2126 * from xtTailgate(); 2126 * from xtTailgate();
2127 */ 2127 */
2128 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 2128 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2129 pxdlock = (struct pxd_lock *) & tlck->lock; 2129 pxdlock = (struct pxd_lock *) & tlck->lock;
2130 nlock = pxdlock->index; 2130 nlock = pxdlock->index;
2131 for (i = 0; i < nlock; i++, pxdlock++) { 2131 for (i = 0; i < nlock; i++, pxdlock++) {
2132 if (pxdlock->flag & mlckALLOCPXD) 2132 if (pxdlock->flag & mlckALLOCPXD)
2133 lrd->log.updatemap.type = 2133 lrd->log.updatemap.type =
2134 cpu_to_le16(LOG_ALLOCPXD); 2134 cpu_to_le16(LOG_ALLOCPXD);
2135 else 2135 else
2136 lrd->log.updatemap.type = 2136 lrd->log.updatemap.type =
2137 cpu_to_le16(LOG_FREEPXD); 2137 cpu_to_le16(LOG_FREEPXD);
2138 lrd->log.updatemap.nxd = cpu_to_le16(1); 2138 lrd->log.updatemap.nxd = cpu_to_le16(1);
2139 lrd->log.updatemap.pxd = pxdlock->pxd; 2139 lrd->log.updatemap.pxd = pxdlock->pxd;
2140 lrd->backchain = 2140 lrd->backchain =
2141 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2141 cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2142 jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", 2142 jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2143 (ulong) addressPXD(&pxdlock->pxd), 2143 (ulong) addressPXD(&pxdlock->pxd),
2144 lengthPXD(&pxdlock->pxd)); 2144 lengthPXD(&pxdlock->pxd));
2145 } 2145 }
2146 2146
2147 /* update bmap */ 2147 /* update bmap */
2148 tlck->flag |= tlckUPDATEMAP; 2148 tlck->flag |= tlckUPDATEMAP;
2149 } 2149 }
2150 } 2150 }
2151 2151
2152 /* 2152 /*
2153 * txEA() 2153 * txEA()
2154 * 2154 *
2155 * function: acquire maplock for EA/ACL extents or 2155 * function: acquire maplock for EA/ACL extents or
2156 * set COMMIT_INLINE flag; 2156 * set COMMIT_INLINE flag;
2157 */ 2157 */
2158 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) 2158 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2159 { 2159 {
2160 struct tlock *tlck = NULL; 2160 struct tlock *tlck = NULL;
2161 struct pxd_lock *maplock = NULL, *pxdlock = NULL; 2161 struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2162 2162
2163 /* 2163 /*
2164 * format maplock for alloc of new EA extent 2164 * format maplock for alloc of new EA extent
2165 */ 2165 */
2166 if (newea) { 2166 if (newea) {
2167 /* Since the newea could be a completely zeroed entry we need to 2167 /* Since the newea could be a completely zeroed entry we need to
2168 * check for the two flags which indicate we should actually 2168 * check for the two flags which indicate we should actually
2169 * commit new EA data 2169 * commit new EA data
2170 */ 2170 */
2171 if (newea->flag & DXD_EXTENT) { 2171 if (newea->flag & DXD_EXTENT) {
2172 tlck = txMaplock(tid, ip, tlckMAP); 2172 tlck = txMaplock(tid, ip, tlckMAP);
2173 maplock = (struct pxd_lock *) & tlck->lock; 2173 maplock = (struct pxd_lock *) & tlck->lock;
2174 pxdlock = (struct pxd_lock *) maplock; 2174 pxdlock = (struct pxd_lock *) maplock;
2175 pxdlock->flag = mlckALLOCPXD; 2175 pxdlock->flag = mlckALLOCPXD;
2176 PXDaddress(&pxdlock->pxd, addressDXD(newea)); 2176 PXDaddress(&pxdlock->pxd, addressDXD(newea));
2177 PXDlength(&pxdlock->pxd, lengthDXD(newea)); 2177 PXDlength(&pxdlock->pxd, lengthDXD(newea));
2178 pxdlock++; 2178 pxdlock++;
2179 maplock->index = 1; 2179 maplock->index = 1;
2180 } else if (newea->flag & DXD_INLINE) { 2180 } else if (newea->flag & DXD_INLINE) {
2181 tlck = NULL; 2181 tlck = NULL;
2182 2182
2183 set_cflag(COMMIT_Inlineea, ip); 2183 set_cflag(COMMIT_Inlineea, ip);
2184 } 2184 }
2185 } 2185 }
2186 2186
2187 /* 2187 /*
2188 * format maplock for free of old EA extent 2188 * format maplock for free of old EA extent
2189 */ 2189 */
2190 if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { 2190 if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2191 if (tlck == NULL) { 2191 if (tlck == NULL) {
2192 tlck = txMaplock(tid, ip, tlckMAP); 2192 tlck = txMaplock(tid, ip, tlckMAP);
2193 maplock = (struct pxd_lock *) & tlck->lock; 2193 maplock = (struct pxd_lock *) & tlck->lock;
2194 pxdlock = (struct pxd_lock *) maplock; 2194 pxdlock = (struct pxd_lock *) maplock;
2195 maplock->index = 0; 2195 maplock->index = 0;
2196 } 2196 }
2197 pxdlock->flag = mlckFREEPXD; 2197 pxdlock->flag = mlckFREEPXD;
2198 PXDaddress(&pxdlock->pxd, addressDXD(oldea)); 2198 PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2199 PXDlength(&pxdlock->pxd, lengthDXD(oldea)); 2199 PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2200 maplock->index++; 2200 maplock->index++;
2201 } 2201 }
2202 } 2202 }
2203 2203
2204 /* 2204 /*
2205 * txForce() 2205 * txForce()
2206 * 2206 *
2207 * function: synchronously write pages locked by transaction 2207 * function: synchronously write pages locked by transaction
2208 * after txLog() but before txUpdateMap(); 2208 * after txLog() but before txUpdateMap();
2209 */ 2209 */
2210 void txForce(struct tblock * tblk) 2210 void txForce(struct tblock * tblk)
2211 { 2211 {
2212 struct tlock *tlck; 2212 struct tlock *tlck;
2213 lid_t lid, next; 2213 lid_t lid, next;
2214 struct metapage *mp; 2214 struct metapage *mp;
2215 2215
2216 /* 2216 /*
2217 * reverse the order of transaction tlocks in 2217 * reverse the order of transaction tlocks in
2218 * careful update order of address index pages 2218 * careful update order of address index pages
2219 * (right to left, bottom up) 2219 * (right to left, bottom up)
2220 */ 2220 */
2221 tlck = lid_to_tlock(tblk->next); 2221 tlck = lid_to_tlock(tblk->next);
2222 lid = tlck->next; 2222 lid = tlck->next;
2223 tlck->next = 0; 2223 tlck->next = 0;
2224 while (lid) { 2224 while (lid) {
2225 tlck = lid_to_tlock(lid); 2225 tlck = lid_to_tlock(lid);
2226 next = tlck->next; 2226 next = tlck->next;
2227 tlck->next = tblk->next; 2227 tlck->next = tblk->next;
2228 tblk->next = lid; 2228 tblk->next = lid;
2229 lid = next; 2229 lid = next;
2230 } 2230 }
2231 2231
2232 /* 2232 /*
2233 * synchronously write the page, and 2233 * synchronously write the page, and
2234 * hold the page for txUpdateMap(); 2234 * hold the page for txUpdateMap();
2235 */ 2235 */
2236 for (lid = tblk->next; lid; lid = next) { 2236 for (lid = tblk->next; lid; lid = next) {
2237 tlck = lid_to_tlock(lid); 2237 tlck = lid_to_tlock(lid);
2238 next = tlck->next; 2238 next = tlck->next;
2239 2239
2240 if ((mp = tlck->mp) != NULL && 2240 if ((mp = tlck->mp) != NULL &&
2241 (tlck->type & tlckBTROOT) == 0) { 2241 (tlck->type & tlckBTROOT) == 0) {
2242 assert(mp->xflag & COMMIT_PAGE); 2242 assert(mp->xflag & COMMIT_PAGE);
2243 2243
2244 if (tlck->flag & tlckWRITEPAGE) { 2244 if (tlck->flag & tlckWRITEPAGE) {
2245 tlck->flag &= ~tlckWRITEPAGE; 2245 tlck->flag &= ~tlckWRITEPAGE;
2246 2246
2247 /* do not release page to freelist */ 2247 /* do not release page to freelist */
2248 force_metapage(mp); 2248 force_metapage(mp);
2249 #if 0 2249 #if 0
2250 /* 2250 /*
2251 * The "right" thing to do here is to 2251 * The "right" thing to do here is to
2252 * synchronously write the metadata. 2252 * synchronously write the metadata.
2253 * With the current implementation this 2253 * With the current implementation this
2254 * is hard since write_metapage requires 2254 * is hard since write_metapage requires
2255 * us to kunmap & remap the page. If we 2255 * us to kunmap & remap the page. If we
2256 * have tlocks pointing into the metadata 2256 * have tlocks pointing into the metadata
2257 * pages, we don't want to do this. I think 2257 * pages, we don't want to do this. I think
2258 * we can get by with synchronously writing 2258 * we can get by with synchronously writing
2259 * the pages when they are released. 2259 * the pages when they are released.
2260 */ 2260 */
2261 assert(mp->nohomeok); 2261 assert(mp->nohomeok);
2262 set_bit(META_dirty, &mp->flag); 2262 set_bit(META_dirty, &mp->flag);
2263 set_bit(META_sync, &mp->flag); 2263 set_bit(META_sync, &mp->flag);
2264 #endif 2264 #endif
2265 } 2265 }
2266 } 2266 }
2267 } 2267 }
2268 } 2268 }
2269 2269
2270 /* 2270 /*
2271 * txUpdateMap() 2271 * txUpdateMap()
2272 * 2272 *
2273 * function: update persistent allocation map (and working map 2273 * function: update persistent allocation map (and working map
2274 * if appropriate); 2274 * if appropriate);
2275 * 2275 *
2276 * parameter: 2276 * parameter:
2277 */ 2277 */
2278 static void txUpdateMap(struct tblock * tblk) 2278 static void txUpdateMap(struct tblock * tblk)
2279 { 2279 {
2280 struct inode *ip; 2280 struct inode *ip;
2281 struct inode *ipimap; 2281 struct inode *ipimap;
2282 lid_t lid; 2282 lid_t lid;
2283 struct tlock *tlck; 2283 struct tlock *tlck;
2284 struct maplock *maplock; 2284 struct maplock *maplock;
2285 struct pxd_lock pxdlock; 2285 struct pxd_lock pxdlock;
2286 int maptype; 2286 int maptype;
2287 int k, nlock; 2287 int k, nlock;
2288 struct metapage *mp = NULL; 2288 struct metapage *mp = NULL;
2289 2289
2290 ipimap = JFS_SBI(tblk->sb)->ipimap; 2290 ipimap = JFS_SBI(tblk->sb)->ipimap;
2291 2291
2292 maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; 2292 maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2293 2293
2294 2294
2295 /* 2295 /*
2296 * update block allocation map 2296 * update block allocation map
2297 * 2297 *
2298 * update allocation state in pmap (and wmap) and 2298 * update allocation state in pmap (and wmap) and
2299 * update lsn of the pmap page; 2299 * update lsn of the pmap page;
2300 */ 2300 */
2301 /* 2301 /*
2302 * scan each tlock/page of transaction for block allocation/free: 2302 * scan each tlock/page of transaction for block allocation/free:
2303 * 2303 *
2304 * for each tlock/page of transaction, update map. 2304 * for each tlock/page of transaction, update map.
2305 * ? are there tlock for pmap and pwmap at the same time ? 2305 * ? are there tlock for pmap and pwmap at the same time ?
2306 */ 2306 */
2307 for (lid = tblk->next; lid; lid = tlck->next) { 2307 for (lid = tblk->next; lid; lid = tlck->next) {
2308 tlck = lid_to_tlock(lid); 2308 tlck = lid_to_tlock(lid);
2309 2309
2310 if ((tlck->flag & tlckUPDATEMAP) == 0) 2310 if ((tlck->flag & tlckUPDATEMAP) == 0)
2311 continue; 2311 continue;
2312 2312
2313 if (tlck->flag & tlckFREEPAGE) { 2313 if (tlck->flag & tlckFREEPAGE) {
2314 /* 2314 /*
2315 * Another thread may attempt to reuse freed space 2315 * Another thread may attempt to reuse freed space
2316 * immediately, so we want to get rid of the metapage 2316 * immediately, so we want to get rid of the metapage
2317 * before anyone else has a chance to get it. 2317 * before anyone else has a chance to get it.
2318 * Lock metapage, update maps, then invalidate 2318 * Lock metapage, update maps, then invalidate
2319 * the metapage. 2319 * the metapage.
2320 */ 2320 */
2321 mp = tlck->mp; 2321 mp = tlck->mp;
2322 ASSERT(mp->xflag & COMMIT_PAGE); 2322 ASSERT(mp->xflag & COMMIT_PAGE);
2323 grab_metapage(mp); 2323 grab_metapage(mp);
2324 } 2324 }
2325 2325
2326 /* 2326 /*
2327 * extent list: 2327 * extent list:
2328 * . in-line PXD list: 2328 * . in-line PXD list:
2329 * . out-of-line XAD list: 2329 * . out-of-line XAD list:
2330 */ 2330 */
2331 maplock = (struct maplock *) & tlck->lock; 2331 maplock = (struct maplock *) & tlck->lock;
2332 nlock = maplock->index; 2332 nlock = maplock->index;
2333 2333
2334 for (k = 0; k < nlock; k++, maplock++) { 2334 for (k = 0; k < nlock; k++, maplock++) {
2335 /* 2335 /*
2336 * allocate blocks in persistent map: 2336 * allocate blocks in persistent map:
2337 * 2337 *
2338 * blocks have been allocated from wmap at alloc time; 2338 * blocks have been allocated from wmap at alloc time;
2339 */ 2339 */
2340 if (maplock->flag & mlckALLOC) { 2340 if (maplock->flag & mlckALLOC) {
2341 txAllocPMap(ipimap, maplock, tblk); 2341 txAllocPMap(ipimap, maplock, tblk);
2342 } 2342 }
2343 /* 2343 /*
2344 * free blocks in persistent and working map: 2344 * free blocks in persistent and working map:
2345 * blocks will be freed in pmap and then in wmap; 2345 * blocks will be freed in pmap and then in wmap;
2346 * 2346 *
2347 * ? tblock specifies the PMAP/PWMAP based upon 2347 * ? tblock specifies the PMAP/PWMAP based upon
2348 * transaction 2348 * transaction
2349 * 2349 *
2350 * free blocks in persistent map: 2350 * free blocks in persistent map:
2351 * blocks will be freed from wmap at last reference 2351 * blocks will be freed from wmap at last reference
2352 * release of the object for regular files; 2352 * release of the object for regular files;
2353 * 2353 *
2354 * Alway free blocks from both persistent & working 2354 * Alway free blocks from both persistent & working
2355 * maps for directories 2355 * maps for directories
2356 */ 2356 */
2357 else { /* (maplock->flag & mlckFREE) */ 2357 else { /* (maplock->flag & mlckFREE) */
2358 2358
2359 if (S_ISDIR(tlck->ip->i_mode)) 2359 if (S_ISDIR(tlck->ip->i_mode))
2360 txFreeMap(ipimap, maplock, 2360 txFreeMap(ipimap, maplock,
2361 tblk, COMMIT_PWMAP); 2361 tblk, COMMIT_PWMAP);
2362 else 2362 else
2363 txFreeMap(ipimap, maplock, 2363 txFreeMap(ipimap, maplock,
2364 tblk, maptype); 2364 tblk, maptype);
2365 } 2365 }
2366 } 2366 }
2367 if (tlck->flag & tlckFREEPAGE) { 2367 if (tlck->flag & tlckFREEPAGE) {
2368 if (!(tblk->flag & tblkGC_LAZY)) { 2368 if (!(tblk->flag & tblkGC_LAZY)) {
2369 /* This is equivalent to txRelease */ 2369 /* This is equivalent to txRelease */
2370 ASSERT(mp->lid == lid); 2370 ASSERT(mp->lid == lid);
2371 tlck->mp->lid = 0; 2371 tlck->mp->lid = 0;
2372 } 2372 }
2373 assert(mp->nohomeok == 1); 2373 assert(mp->nohomeok == 1);
2374 metapage_homeok(mp); 2374 metapage_homeok(mp);
2375 discard_metapage(mp); 2375 discard_metapage(mp);
2376 tlck->mp = NULL; 2376 tlck->mp = NULL;
2377 } 2377 }
2378 } 2378 }
2379 /* 2379 /*
2380 * update inode allocation map 2380 * update inode allocation map
2381 * 2381 *
2382 * update allocation state in pmap and 2382 * update allocation state in pmap and
2383 * update lsn of the pmap page; 2383 * update lsn of the pmap page;
2384 * update in-memory inode flag/state 2384 * update in-memory inode flag/state
2385 * 2385 *
2386 * unlock mapper/write lock 2386 * unlock mapper/write lock
2387 */ 2387 */
2388 if (tblk->xflag & COMMIT_CREATE) { 2388 if (tblk->xflag & COMMIT_CREATE) {
2389 diUpdatePMap(ipimap, tblk->ino, FALSE, tblk); 2389 diUpdatePMap(ipimap, tblk->ino, FALSE, tblk);
2390 ipimap->i_state |= I_DIRTY; 2390 ipimap->i_state |= I_DIRTY;
2391 /* update persistent block allocation map 2391 /* update persistent block allocation map
2392 * for the allocation of inode extent; 2392 * for the allocation of inode extent;
2393 */ 2393 */
2394 pxdlock.flag = mlckALLOCPXD; 2394 pxdlock.flag = mlckALLOCPXD;
2395 pxdlock.pxd = tblk->u.ixpxd; 2395 pxdlock.pxd = tblk->u.ixpxd;
2396 pxdlock.index = 1; 2396 pxdlock.index = 1;
2397 txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); 2397 txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2398 } else if (tblk->xflag & COMMIT_DELETE) { 2398 } else if (tblk->xflag & COMMIT_DELETE) {
2399 ip = tblk->u.ip; 2399 ip = tblk->u.ip;
2400 diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk); 2400 diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk);
2401 ipimap->i_state |= I_DIRTY; 2401 ipimap->i_state |= I_DIRTY;
2402 iput(ip); 2402 iput(ip);
2403 } 2403 }
2404 } 2404 }
2405 2405
2406 /* 2406 /*
2407 * txAllocPMap() 2407 * txAllocPMap()
2408 * 2408 *
2409 * function: allocate from persistent map; 2409 * function: allocate from persistent map;
2410 * 2410 *
2411 * parameter: 2411 * parameter:
2412 * ipbmap - 2412 * ipbmap -
2413 * malock - 2413 * malock -
2414 * xad list: 2414 * xad list:
2415 * pxd: 2415 * pxd:
2416 * 2416 *
2417 * maptype - 2417 * maptype -
2418 * allocate from persistent map; 2418 * allocate from persistent map;
2419 * free from persistent map; 2419 * free from persistent map;
2420 * (e.g., tmp file - free from working map at releae 2420 * (e.g., tmp file - free from working map at releae
2421 * of last reference); 2421 * of last reference);
2422 * free from persistent and working map; 2422 * free from persistent and working map;
2423 * 2423 *
2424 * lsn - log sequence number; 2424 * lsn - log sequence number;
2425 */ 2425 */
2426 static void txAllocPMap(struct inode *ip, struct maplock * maplock, 2426 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2427 struct tblock * tblk) 2427 struct tblock * tblk)
2428 { 2428 {
2429 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 2429 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2430 struct xdlistlock *xadlistlock; 2430 struct xdlistlock *xadlistlock;
2431 xad_t *xad; 2431 xad_t *xad;
2432 s64 xaddr; 2432 s64 xaddr;
2433 int xlen; 2433 int xlen;
2434 struct pxd_lock *pxdlock; 2434 struct pxd_lock *pxdlock;
2435 struct xdlistlock *pxdlistlock; 2435 struct xdlistlock *pxdlistlock;
2436 pxd_t *pxd; 2436 pxd_t *pxd;
2437 int n; 2437 int n;
2438 2438
2439 /* 2439 /*
2440 * allocate from persistent map; 2440 * allocate from persistent map;
2441 */ 2441 */
2442 if (maplock->flag & mlckALLOCXADLIST) { 2442 if (maplock->flag & mlckALLOCXADLIST) {
2443 xadlistlock = (struct xdlistlock *) maplock; 2443 xadlistlock = (struct xdlistlock *) maplock;
2444 xad = xadlistlock->xdlist; 2444 xad = xadlistlock->xdlist;
2445 for (n = 0; n < xadlistlock->count; n++, xad++) { 2445 for (n = 0; n < xadlistlock->count; n++, xad++) {
2446 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { 2446 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2447 xaddr = addressXAD(xad); 2447 xaddr = addressXAD(xad);
2448 xlen = lengthXAD(xad); 2448 xlen = lengthXAD(xad);
2449 dbUpdatePMap(ipbmap, FALSE, xaddr, 2449 dbUpdatePMap(ipbmap, FALSE, xaddr,
2450 (s64) xlen, tblk); 2450 (s64) xlen, tblk);
2451 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 2451 xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2452 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", 2452 jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2453 (ulong) xaddr, xlen); 2453 (ulong) xaddr, xlen);
2454 } 2454 }
2455 } 2455 }
2456 } else if (maplock->flag & mlckALLOCPXD) { 2456 } else if (maplock->flag & mlckALLOCPXD) {
2457 pxdlock = (struct pxd_lock *) maplock; 2457 pxdlock = (struct pxd_lock *) maplock;
2458 xaddr = addressPXD(&pxdlock->pxd); 2458 xaddr = addressPXD(&pxdlock->pxd);
2459 xlen = lengthPXD(&pxdlock->pxd); 2459 xlen = lengthPXD(&pxdlock->pxd);
2460 dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk); 2460 dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk);
2461 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); 2461 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2462 } else { /* (maplock->flag & mlckALLOCPXDLIST) */ 2462 } else { /* (maplock->flag & mlckALLOCPXDLIST) */
2463 2463
2464 pxdlistlock = (struct xdlistlock *) maplock; 2464 pxdlistlock = (struct xdlistlock *) maplock;
2465 pxd = pxdlistlock->xdlist; 2465 pxd = pxdlistlock->xdlist;
2466 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2466 for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2467 xaddr = addressPXD(pxd); 2467 xaddr = addressPXD(pxd);
2468 xlen = lengthPXD(pxd); 2468 xlen = lengthPXD(pxd);
2469 dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, 2469 dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen,
2470 tblk); 2470 tblk);
2471 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", 2471 jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2472 (ulong) xaddr, xlen); 2472 (ulong) xaddr, xlen);
2473 } 2473 }
2474 } 2474 }
2475 } 2475 }
2476 2476
2477 /* 2477 /*
2478 * txFreeMap() 2478 * txFreeMap()
2479 * 2479 *
2480 * function: free from persistent and/or working map; 2480 * function: free from persistent and/or working map;
2481 * 2481 *
2482 * todo: optimization 2482 * todo: optimization
2483 */ 2483 */
2484 void txFreeMap(struct inode *ip, 2484 void txFreeMap(struct inode *ip,
2485 struct maplock * maplock, struct tblock * tblk, int maptype) 2485 struct maplock * maplock, struct tblock * tblk, int maptype)
2486 { 2486 {
2487 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 2487 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2488 struct xdlistlock *xadlistlock; 2488 struct xdlistlock *xadlistlock;
2489 xad_t *xad; 2489 xad_t *xad;
2490 s64 xaddr; 2490 s64 xaddr;
2491 int xlen; 2491 int xlen;
2492 struct pxd_lock *pxdlock; 2492 struct pxd_lock *pxdlock;
2493 struct xdlistlock *pxdlistlock; 2493 struct xdlistlock *pxdlistlock;
2494 pxd_t *pxd; 2494 pxd_t *pxd;
2495 int n; 2495 int n;
2496 2496
2497 jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", 2497 jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2498 tblk, maplock, maptype); 2498 tblk, maplock, maptype);
2499 2499
2500 /* 2500 /*
2501 * free from persistent map; 2501 * free from persistent map;
2502 */ 2502 */
2503 if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { 2503 if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2504 if (maplock->flag & mlckFREEXADLIST) { 2504 if (maplock->flag & mlckFREEXADLIST) {
2505 xadlistlock = (struct xdlistlock *) maplock; 2505 xadlistlock = (struct xdlistlock *) maplock;
2506 xad = xadlistlock->xdlist; 2506 xad = xadlistlock->xdlist;
2507 for (n = 0; n < xadlistlock->count; n++, xad++) { 2507 for (n = 0; n < xadlistlock->count; n++, xad++) {
2508 if (!(xad->flag & XAD_NEW)) { 2508 if (!(xad->flag & XAD_NEW)) {
2509 xaddr = addressXAD(xad); 2509 xaddr = addressXAD(xad);
2510 xlen = lengthXAD(xad); 2510 xlen = lengthXAD(xad);
2511 dbUpdatePMap(ipbmap, TRUE, xaddr, 2511 dbUpdatePMap(ipbmap, TRUE, xaddr,
2512 (s64) xlen, tblk); 2512 (s64) xlen, tblk);
2513 jfs_info("freePMap: xaddr:0x%lx " 2513 jfs_info("freePMap: xaddr:0x%lx "
2514 "xlen:%d", 2514 "xlen:%d",
2515 (ulong) xaddr, xlen); 2515 (ulong) xaddr, xlen);
2516 } 2516 }
2517 } 2517 }
2518 } else if (maplock->flag & mlckFREEPXD) { 2518 } else if (maplock->flag & mlckFREEPXD) {
2519 pxdlock = (struct pxd_lock *) maplock; 2519 pxdlock = (struct pxd_lock *) maplock;
2520 xaddr = addressPXD(&pxdlock->pxd); 2520 xaddr = addressPXD(&pxdlock->pxd);
2521 xlen = lengthPXD(&pxdlock->pxd); 2521 xlen = lengthPXD(&pxdlock->pxd);
2522 dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen, 2522 dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen,
2523 tblk); 2523 tblk);
2524 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2524 jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2525 (ulong) xaddr, xlen); 2525 (ulong) xaddr, xlen);
2526 } else { /* (maplock->flag & mlckALLOCPXDLIST) */ 2526 } else { /* (maplock->flag & mlckALLOCPXDLIST) */
2527 2527
2528 pxdlistlock = (struct xdlistlock *) maplock; 2528 pxdlistlock = (struct xdlistlock *) maplock;
2529 pxd = pxdlistlock->xdlist; 2529 pxd = pxdlistlock->xdlist;
2530 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2530 for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2531 xaddr = addressPXD(pxd); 2531 xaddr = addressPXD(pxd);
2532 xlen = lengthPXD(pxd); 2532 xlen = lengthPXD(pxd);
2533 dbUpdatePMap(ipbmap, TRUE, xaddr, 2533 dbUpdatePMap(ipbmap, TRUE, xaddr,
2534 (s64) xlen, tblk); 2534 (s64) xlen, tblk);
2535 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2535 jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2536 (ulong) xaddr, xlen); 2536 (ulong) xaddr, xlen);
2537 } 2537 }
2538 } 2538 }
2539 } 2539 }
2540 2540
2541 /* 2541 /*
2542 * free from working map; 2542 * free from working map;
2543 */ 2543 */
2544 if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { 2544 if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2545 if (maplock->flag & mlckFREEXADLIST) { 2545 if (maplock->flag & mlckFREEXADLIST) {
2546 xadlistlock = (struct xdlistlock *) maplock; 2546 xadlistlock = (struct xdlistlock *) maplock;
2547 xad = xadlistlock->xdlist; 2547 xad = xadlistlock->xdlist;
2548 for (n = 0; n < xadlistlock->count; n++, xad++) { 2548 for (n = 0; n < xadlistlock->count; n++, xad++) {
2549 xaddr = addressXAD(xad); 2549 xaddr = addressXAD(xad);
2550 xlen = lengthXAD(xad); 2550 xlen = lengthXAD(xad);
2551 dbFree(ip, xaddr, (s64) xlen); 2551 dbFree(ip, xaddr, (s64) xlen);
2552 xad->flag = 0; 2552 xad->flag = 0;
2553 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2553 jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2554 (ulong) xaddr, xlen); 2554 (ulong) xaddr, xlen);
2555 } 2555 }
2556 } else if (maplock->flag & mlckFREEPXD) { 2556 } else if (maplock->flag & mlckFREEPXD) {
2557 pxdlock = (struct pxd_lock *) maplock; 2557 pxdlock = (struct pxd_lock *) maplock;
2558 xaddr = addressPXD(&pxdlock->pxd); 2558 xaddr = addressPXD(&pxdlock->pxd);
2559 xlen = lengthPXD(&pxdlock->pxd); 2559 xlen = lengthPXD(&pxdlock->pxd);
2560 dbFree(ip, xaddr, (s64) xlen); 2560 dbFree(ip, xaddr, (s64) xlen);
2561 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2561 jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2562 (ulong) xaddr, xlen); 2562 (ulong) xaddr, xlen);
2563 } else { /* (maplock->flag & mlckFREEPXDLIST) */ 2563 } else { /* (maplock->flag & mlckFREEPXDLIST) */
2564 2564
2565 pxdlistlock = (struct xdlistlock *) maplock; 2565 pxdlistlock = (struct xdlistlock *) maplock;
2566 pxd = pxdlistlock->xdlist; 2566 pxd = pxdlistlock->xdlist;
2567 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2567 for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2568 xaddr = addressPXD(pxd); 2568 xaddr = addressPXD(pxd);
2569 xlen = lengthPXD(pxd); 2569 xlen = lengthPXD(pxd);
2570 dbFree(ip, xaddr, (s64) xlen); 2570 dbFree(ip, xaddr, (s64) xlen);
2571 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2571 jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2572 (ulong) xaddr, xlen); 2572 (ulong) xaddr, xlen);
2573 } 2573 }
2574 } 2574 }
2575 } 2575 }
2576 } 2576 }
2577 2577
2578 /* 2578 /*
2579 * txFreelock() 2579 * txFreelock()
2580 * 2580 *
2581 * function: remove tlock from inode anonymous locklist 2581 * function: remove tlock from inode anonymous locklist
2582 */ 2582 */
2583 void txFreelock(struct inode *ip) 2583 void txFreelock(struct inode *ip)
2584 { 2584 {
2585 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2585 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2586 struct tlock *xtlck, *tlck; 2586 struct tlock *xtlck, *tlck;
2587 lid_t xlid = 0, lid; 2587 lid_t xlid = 0, lid;
2588 2588
2589 if (!jfs_ip->atlhead) 2589 if (!jfs_ip->atlhead)
2590 return; 2590 return;
2591 2591
2592 TXN_LOCK(); 2592 TXN_LOCK();
2593 xtlck = (struct tlock *) &jfs_ip->atlhead; 2593 xtlck = (struct tlock *) &jfs_ip->atlhead;
2594 2594
2595 while ((lid = xtlck->next) != 0) { 2595 while ((lid = xtlck->next) != 0) {
2596 tlck = lid_to_tlock(lid); 2596 tlck = lid_to_tlock(lid);
2597 if (tlck->flag & tlckFREELOCK) { 2597 if (tlck->flag & tlckFREELOCK) {
2598 xtlck->next = tlck->next; 2598 xtlck->next = tlck->next;
2599 txLockFree(lid); 2599 txLockFree(lid);
2600 } else { 2600 } else {
2601 xtlck = tlck; 2601 xtlck = tlck;
2602 xlid = lid; 2602 xlid = lid;
2603 } 2603 }
2604 } 2604 }
2605 2605
2606 if (jfs_ip->atlhead) 2606 if (jfs_ip->atlhead)
2607 jfs_ip->atltail = xlid; 2607 jfs_ip->atltail = xlid;
2608 else { 2608 else {
2609 jfs_ip->atltail = 0; 2609 jfs_ip->atltail = 0;
2610 /* 2610 /*
2611 * If inode was on anon_list, remove it 2611 * If inode was on anon_list, remove it
2612 */ 2612 */
2613 list_del_init(&jfs_ip->anon_inode_list); 2613 list_del_init(&jfs_ip->anon_inode_list);
2614 } 2614 }
2615 TXN_UNLOCK(); 2615 TXN_UNLOCK();
2616 } 2616 }
2617 2617
2618 /* 2618 /*
2619 * txAbort() 2619 * txAbort()
2620 * 2620 *
2621 * function: abort tx before commit; 2621 * function: abort tx before commit;
2622 * 2622 *
2623 * frees line-locks and segment locks for all 2623 * frees line-locks and segment locks for all
2624 * segments in comdata structure. 2624 * segments in comdata structure.
2625 * Optionally sets state of file-system to FM_DIRTY in super-block. 2625 * Optionally sets state of file-system to FM_DIRTY in super-block.
2626 * log age of page-frames in memory for which caller has 2626 * log age of page-frames in memory for which caller has
2627 * are reset to 0 (to avoid logwarap). 2627 * are reset to 0 (to avoid logwarap).
2628 */ 2628 */
2629 void txAbort(tid_t tid, int dirty) 2629 void txAbort(tid_t tid, int dirty)
2630 { 2630 {
2631 lid_t lid, next; 2631 lid_t lid, next;
2632 struct metapage *mp; 2632 struct metapage *mp;
2633 struct tblock *tblk = tid_to_tblock(tid); 2633 struct tblock *tblk = tid_to_tblock(tid);
2634 struct tlock *tlck; 2634 struct tlock *tlck;
2635 2635
2636 /* 2636 /*
2637 * free tlocks of the transaction 2637 * free tlocks of the transaction
2638 */ 2638 */
2639 for (lid = tblk->next; lid; lid = next) { 2639 for (lid = tblk->next; lid; lid = next) {
2640 tlck = lid_to_tlock(lid); 2640 tlck = lid_to_tlock(lid);
2641 next = tlck->next; 2641 next = tlck->next;
2642 mp = tlck->mp; 2642 mp = tlck->mp;
2643 JFS_IP(tlck->ip)->xtlid = 0; 2643 JFS_IP(tlck->ip)->xtlid = 0;
2644 2644
2645 if (mp) { 2645 if (mp) {
2646 mp->lid = 0; 2646 mp->lid = 0;
2647 2647
2648 /* 2648 /*
2649 * reset lsn of page to avoid logwarap: 2649 * reset lsn of page to avoid logwarap:
2650 * 2650 *
2651 * (page may have been previously committed by another 2651 * (page may have been previously committed by another
2652 * transaction(s) but has not been paged, i.e., 2652 * transaction(s) but has not been paged, i.e.,
2653 * it may be on logsync list even though it has not 2653 * it may be on logsync list even though it has not
2654 * been logged for the current tx.) 2654 * been logged for the current tx.)
2655 */ 2655 */
2656 if (mp->xflag & COMMIT_PAGE && mp->lsn) 2656 if (mp->xflag & COMMIT_PAGE && mp->lsn)
2657 LogSyncRelease(mp); 2657 LogSyncRelease(mp);
2658 } 2658 }
2659 /* insert tlock at head of freelist */ 2659 /* insert tlock at head of freelist */
2660 TXN_LOCK(); 2660 TXN_LOCK();
2661 txLockFree(lid); 2661 txLockFree(lid);
2662 TXN_UNLOCK(); 2662 TXN_UNLOCK();
2663 } 2663 }
2664 2664
2665 /* caller will free the transaction block */ 2665 /* caller will free the transaction block */
2666 2666
2667 tblk->next = tblk->last = 0; 2667 tblk->next = tblk->last = 0;
2668 2668
2669 /* 2669 /*
2670 * mark filesystem dirty 2670 * mark filesystem dirty
2671 */ 2671 */
2672 if (dirty) 2672 if (dirty)
2673 jfs_error(tblk->sb, "txAbort"); 2673 jfs_error(tblk->sb, "txAbort");
2674 2674
2675 return; 2675 return;
2676 } 2676 }
2677 2677
2678 /* 2678 /*
2679 * txLazyCommit(void) 2679 * txLazyCommit(void)
2680 * 2680 *
2681 * All transactions except those changing ipimap (COMMIT_FORCE) are 2681 * All transactions except those changing ipimap (COMMIT_FORCE) are
2682 * processed by this routine. This insures that the inode and block 2682 * processed by this routine. This insures that the inode and block
2683 * allocation maps are updated in order. For synchronous transactions, 2683 * allocation maps are updated in order. For synchronous transactions,
2684 * let the user thread finish processing after txUpdateMap() is called. 2684 * let the user thread finish processing after txUpdateMap() is called.
2685 */ 2685 */
2686 static void txLazyCommit(struct tblock * tblk) 2686 static void txLazyCommit(struct tblock * tblk)
2687 { 2687 {
2688 struct jfs_log *log; 2688 struct jfs_log *log;
2689 2689
2690 while (((tblk->flag & tblkGC_READY) == 0) && 2690 while (((tblk->flag & tblkGC_READY) == 0) &&
2691 ((tblk->flag & tblkGC_UNLOCKED) == 0)) { 2691 ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2692 /* We must have gotten ahead of the user thread 2692 /* We must have gotten ahead of the user thread
2693 */ 2693 */
2694 jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); 2694 jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2695 yield(); 2695 yield();
2696 } 2696 }
2697 2697
2698 jfs_info("txLazyCommit: processing tblk 0x%p", tblk); 2698 jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2699 2699
2700 txUpdateMap(tblk); 2700 txUpdateMap(tblk);
2701 2701
2702 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; 2702 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2703 2703
2704 spin_lock_irq(&log->gclock); // LOGGC_LOCK 2704 spin_lock_irq(&log->gclock); // LOGGC_LOCK
2705 2705
2706 tblk->flag |= tblkGC_COMMITTED; 2706 tblk->flag |= tblkGC_COMMITTED;
2707 2707
2708 if (tblk->flag & tblkGC_READY) 2708 if (tblk->flag & tblkGC_READY)
2709 log->gcrtc--; 2709 log->gcrtc--;
2710 2710
2711 wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP 2711 wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP
2712 2712
2713 /* 2713 /*
2714 * Can't release log->gclock until we've tested tblk->flag 2714 * Can't release log->gclock until we've tested tblk->flag
2715 */ 2715 */
2716 if (tblk->flag & tblkGC_LAZY) { 2716 if (tblk->flag & tblkGC_LAZY) {
2717 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 2717 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK
2718 txUnlock(tblk); 2718 txUnlock(tblk);
2719 tblk->flag &= ~tblkGC_LAZY; 2719 tblk->flag &= ~tblkGC_LAZY;
2720 txEnd(tblk - TxBlock); /* Convert back to tid */ 2720 txEnd(tblk - TxBlock); /* Convert back to tid */
2721 } else 2721 } else
2722 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 2722 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK
2723 2723
2724 jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); 2724 jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2725 } 2725 }
2726 2726
2727 /* 2727 /*
2728 * jfs_lazycommit(void) 2728 * jfs_lazycommit(void)
2729 * 2729 *
2730 * To be run as a kernel daemon. If lbmIODone is called in an interrupt 2730 * To be run as a kernel daemon. If lbmIODone is called in an interrupt
2731 * context, or where blocking is not wanted, this routine will process 2731 * context, or where blocking is not wanted, this routine will process
2732 * committed transactions from the unlock queue. 2732 * committed transactions from the unlock queue.
2733 */ 2733 */
2734 int jfs_lazycommit(void *arg) 2734 int jfs_lazycommit(void *arg)
2735 { 2735 {
2736 int WorkDone; 2736 int WorkDone;
2737 struct tblock *tblk; 2737 struct tblock *tblk;
2738 unsigned long flags; 2738 unsigned long flags;
2739 struct jfs_sb_info *sbi; 2739 struct jfs_sb_info *sbi;
2740 2740
2741 daemonize("jfsCommit"); 2741 daemonize("jfsCommit");
2742 2742
2743 complete(&jfsIOwait); 2743 complete(&jfsIOwait);
2744 2744
2745 do { 2745 do {
2746 LAZY_LOCK(flags); 2746 LAZY_LOCK(flags);
2747 jfs_commit_thread_waking = 0; /* OK to wake another thread */ 2747 jfs_commit_thread_waking = 0; /* OK to wake another thread */
2748 while (!list_empty(&TxAnchor.unlock_queue)) { 2748 while (!list_empty(&TxAnchor.unlock_queue)) {
2749 WorkDone = 0; 2749 WorkDone = 0;
2750 list_for_each_entry(tblk, &TxAnchor.unlock_queue, 2750 list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2751 cqueue) { 2751 cqueue) {
2752 2752
2753 sbi = JFS_SBI(tblk->sb); 2753 sbi = JFS_SBI(tblk->sb);
2754 /* 2754 /*
2755 * For each volume, the transactions must be 2755 * For each volume, the transactions must be
2756 * handled in order. If another commit thread 2756 * handled in order. If another commit thread
2757 * is handling a tblk for this superblock, 2757 * is handling a tblk for this superblock,
2758 * skip it 2758 * skip it
2759 */ 2759 */
2760 if (sbi->commit_state & IN_LAZYCOMMIT) 2760 if (sbi->commit_state & IN_LAZYCOMMIT)
2761 continue; 2761 continue;
2762 2762
2763 sbi->commit_state |= IN_LAZYCOMMIT; 2763 sbi->commit_state |= IN_LAZYCOMMIT;
2764 WorkDone = 1; 2764 WorkDone = 1;
2765 2765
2766 /* 2766 /*
2767 * Remove transaction from queue 2767 * Remove transaction from queue
2768 */ 2768 */
2769 list_del(&tblk->cqueue); 2769 list_del(&tblk->cqueue);
2770 2770
2771 LAZY_UNLOCK(flags); 2771 LAZY_UNLOCK(flags);
2772 txLazyCommit(tblk); 2772 txLazyCommit(tblk);
2773 LAZY_LOCK(flags); 2773 LAZY_LOCK(flags);
2774 2774
2775 sbi->commit_state &= ~IN_LAZYCOMMIT; 2775 sbi->commit_state &= ~IN_LAZYCOMMIT;
2776 /* 2776 /*
2777 * Don't continue in the for loop. (We can't 2777 * Don't continue in the for loop. (We can't
2778 * anyway, it's unsafe!) We want to go back to 2778 * anyway, it's unsafe!) We want to go back to
2779 * the beginning of the list. 2779 * the beginning of the list.
2780 */ 2780 */
2781 break; 2781 break;
2782 } 2782 }
2783 2783
2784 /* If there was nothing to do, don't continue */ 2784 /* If there was nothing to do, don't continue */
2785 if (!WorkDone) 2785 if (!WorkDone)
2786 break; 2786 break;
2787 } 2787 }
2788 /* In case a wakeup came while all threads were active */ 2788 /* In case a wakeup came while all threads were active */
2789 jfs_commit_thread_waking = 0; 2789 jfs_commit_thread_waking = 0;
2790 2790
2791 if (freezing(current)) { 2791 if (freezing(current)) {
2792 LAZY_UNLOCK(flags); 2792 LAZY_UNLOCK(flags);
2793 refrigerator(); 2793 refrigerator();
2794 } else { 2794 } else {
2795 DECLARE_WAITQUEUE(wq, current); 2795 DECLARE_WAITQUEUE(wq, current);
2796 2796
2797 add_wait_queue(&jfs_commit_thread_wait, &wq); 2797 add_wait_queue(&jfs_commit_thread_wait, &wq);
2798 set_current_state(TASK_INTERRUPTIBLE); 2798 set_current_state(TASK_INTERRUPTIBLE);
2799 LAZY_UNLOCK(flags); 2799 LAZY_UNLOCK(flags);
2800 schedule(); 2800 schedule();
2801 current->state = TASK_RUNNING; 2801 current->state = TASK_RUNNING;
2802 remove_wait_queue(&jfs_commit_thread_wait, &wq); 2802 remove_wait_queue(&jfs_commit_thread_wait, &wq);
2803 } 2803 }
2804 } while (!jfs_stop_threads); 2804 } while (!jfs_stop_threads);
2805 2805
2806 if (!list_empty(&TxAnchor.unlock_queue)) 2806 if (!list_empty(&TxAnchor.unlock_queue))
2807 jfs_err("jfs_lazycommit being killed w/pending transactions!"); 2807 jfs_err("jfs_lazycommit being killed w/pending transactions!");
2808 else 2808 else
2809 jfs_info("jfs_lazycommit being killed\n"); 2809 jfs_info("jfs_lazycommit being killed\n");
2810 complete_and_exit(&jfsIOwait, 0); 2810 complete_and_exit(&jfsIOwait, 0);
2811 } 2811 }
2812 2812
2813 void txLazyUnlock(struct tblock * tblk) 2813 void txLazyUnlock(struct tblock * tblk)
2814 { 2814 {
2815 unsigned long flags; 2815 unsigned long flags;
2816 2816
2817 LAZY_LOCK(flags); 2817 LAZY_LOCK(flags);
2818 2818
2819 list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); 2819 list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2820 /* 2820 /*
2821 * Don't wake up a commit thread if there is already one servicing 2821 * Don't wake up a commit thread if there is already one servicing
2822 * this superblock, or if the last one we woke up hasn't started yet. 2822 * this superblock, or if the last one we woke up hasn't started yet.
2823 */ 2823 */
2824 if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && 2824 if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2825 !jfs_commit_thread_waking) { 2825 !jfs_commit_thread_waking) {
2826 jfs_commit_thread_waking = 1; 2826 jfs_commit_thread_waking = 1;
2827 wake_up(&jfs_commit_thread_wait); 2827 wake_up(&jfs_commit_thread_wait);
2828 } 2828 }
2829 LAZY_UNLOCK(flags); 2829 LAZY_UNLOCK(flags);
2830 } 2830 }
2831 2831
2832 static void LogSyncRelease(struct metapage * mp) 2832 static void LogSyncRelease(struct metapage * mp)
2833 { 2833 {
2834 struct jfs_log *log = mp->log; 2834 struct jfs_log *log = mp->log;
2835 2835
2836 assert(mp->nohomeok); 2836 assert(mp->nohomeok);
2837 assert(log); 2837 assert(log);
2838 metapage_homeok(mp); 2838 metapage_homeok(mp);
2839 } 2839 }
2840 2840
2841 /* 2841 /*
2842 * txQuiesce 2842 * txQuiesce
2843 * 2843 *
2844 * Block all new transactions and push anonymous transactions to 2844 * Block all new transactions and push anonymous transactions to
2845 * completion 2845 * completion
2846 * 2846 *
2847 * This does almost the same thing as jfs_sync below. We don't 2847 * This does almost the same thing as jfs_sync below. We don't
2848 * worry about deadlocking when jfs_tlocks_low is set, since we would 2848 * worry about deadlocking when jfs_tlocks_low is set, since we would
2849 * expect jfs_sync to get us out of that jam. 2849 * expect jfs_sync to get us out of that jam.
2850 */ 2850 */
2851 void txQuiesce(struct super_block *sb) 2851 void txQuiesce(struct super_block *sb)
2852 { 2852 {
2853 struct inode *ip; 2853 struct inode *ip;
2854 struct jfs_inode_info *jfs_ip; 2854 struct jfs_inode_info *jfs_ip;
2855 struct jfs_log *log = JFS_SBI(sb)->log; 2855 struct jfs_log *log = JFS_SBI(sb)->log;
2856 tid_t tid; 2856 tid_t tid;
2857 2857
2858 set_bit(log_QUIESCE, &log->flag); 2858 set_bit(log_QUIESCE, &log->flag);
2859 2859
2860 TXN_LOCK(); 2860 TXN_LOCK();
2861 restart: 2861 restart:
2862 while (!list_empty(&TxAnchor.anon_list)) { 2862 while (!list_empty(&TxAnchor.anon_list)) {
2863 jfs_ip = list_entry(TxAnchor.anon_list.next, 2863 jfs_ip = list_entry(TxAnchor.anon_list.next,
2864 struct jfs_inode_info, 2864 struct jfs_inode_info,
2865 anon_inode_list); 2865 anon_inode_list);
2866 ip = &jfs_ip->vfs_inode; 2866 ip = &jfs_ip->vfs_inode;
2867 2867
2868 /* 2868 /*
2869 * inode will be removed from anonymous list 2869 * inode will be removed from anonymous list
2870 * when it is committed 2870 * when it is committed
2871 */ 2871 */
2872 TXN_UNLOCK(); 2872 TXN_UNLOCK();
2873 tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); 2873 tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2874 down(&jfs_ip->commit_sem); 2874 down(&jfs_ip->commit_sem);
2875 txCommit(tid, 1, &ip, 0); 2875 txCommit(tid, 1, &ip, 0);
2876 txEnd(tid); 2876 txEnd(tid);
2877 up(&jfs_ip->commit_sem); 2877 up(&jfs_ip->commit_sem);
2878 /* 2878 /*
2879 * Just to be safe. I don't know how 2879 * Just to be safe. I don't know how
2880 * long we can run without blocking 2880 * long we can run without blocking
2881 */ 2881 */
2882 cond_resched(); 2882 cond_resched();
2883 TXN_LOCK(); 2883 TXN_LOCK();
2884 } 2884 }
2885 2885
2886 /* 2886 /*
2887 * If jfs_sync is running in parallel, there could be some inodes 2887 * If jfs_sync is running in parallel, there could be some inodes
2888 * on anon_list2. Let's check. 2888 * on anon_list2. Let's check.
2889 */ 2889 */
2890 if (!list_empty(&TxAnchor.anon_list2)) { 2890 if (!list_empty(&TxAnchor.anon_list2)) {
2891 list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2891 list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2892 INIT_LIST_HEAD(&TxAnchor.anon_list2); 2892 INIT_LIST_HEAD(&TxAnchor.anon_list2);
2893 goto restart; 2893 goto restart;
2894 } 2894 }
2895 TXN_UNLOCK(); 2895 TXN_UNLOCK();
2896 2896
2897 /* 2897 /*
2898 * We may need to kick off the group commit 2898 * We may need to kick off the group commit
2899 */ 2899 */
2900 jfs_flush_journal(log, 0); 2900 jfs_flush_journal(log, 0);
2901 } 2901 }
2902 2902
2903 /* 2903 /*
2904 * txResume() 2904 * txResume()
2905 * 2905 *
2906 * Allows transactions to start again following txQuiesce 2906 * Allows transactions to start again following txQuiesce
2907 */ 2907 */
2908 void txResume(struct super_block *sb) 2908 void txResume(struct super_block *sb)
2909 { 2909 {
2910 struct jfs_log *log = JFS_SBI(sb)->log; 2910 struct jfs_log *log = JFS_SBI(sb)->log;
2911 2911
2912 clear_bit(log_QUIESCE, &log->flag); 2912 clear_bit(log_QUIESCE, &log->flag);
2913 TXN_WAKEUP(&log->syncwait); 2913 TXN_WAKEUP(&log->syncwait);
2914 } 2914 }
2915 2915
2916 /* 2916 /*
2917 * jfs_sync(void) 2917 * jfs_sync(void)
2918 * 2918 *
2919 * To be run as a kernel daemon. This is awakened when tlocks run low. 2919 * To be run as a kernel daemon. This is awakened when tlocks run low.
2920 * We write any inodes that have anonymous tlocks so they will become 2920 * We write any inodes that have anonymous tlocks so they will become
2921 * available. 2921 * available.
2922 */ 2922 */
2923 int jfs_sync(void *arg) 2923 int jfs_sync(void *arg)
2924 { 2924 {
2925 struct inode *ip; 2925 struct inode *ip;
2926 struct jfs_inode_info *jfs_ip; 2926 struct jfs_inode_info *jfs_ip;
2927 int rc; 2927 int rc;
2928 tid_t tid; 2928 tid_t tid;
2929 2929
2930 daemonize("jfsSync"); 2930 daemonize("jfsSync");
2931 2931
2932 complete(&jfsIOwait); 2932 complete(&jfsIOwait);
2933 2933
2934 do { 2934 do {
2935 /* 2935 /*
2936 * write each inode on the anonymous inode list 2936 * write each inode on the anonymous inode list
2937 */ 2937 */
2938 TXN_LOCK(); 2938 TXN_LOCK();
2939 while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { 2939 while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2940 jfs_ip = list_entry(TxAnchor.anon_list.next, 2940 jfs_ip = list_entry(TxAnchor.anon_list.next,
2941 struct jfs_inode_info, 2941 struct jfs_inode_info,
2942 anon_inode_list); 2942 anon_inode_list);
2943 ip = &jfs_ip->vfs_inode; 2943 ip = &jfs_ip->vfs_inode;
2944 2944
2945 if (! igrab(ip)) { 2945 if (! igrab(ip)) {
2946 /* 2946 /*
2947 * Inode is being freed 2947 * Inode is being freed
2948 */ 2948 */
2949 list_del_init(&jfs_ip->anon_inode_list); 2949 list_del_init(&jfs_ip->anon_inode_list);
2950 } else if (! down_trylock(&jfs_ip->commit_sem)) { 2950 } else if (! down_trylock(&jfs_ip->commit_sem)) {
2951 /* 2951 /*
2952 * inode will be removed from anonymous list 2952 * inode will be removed from anonymous list
2953 * when it is committed 2953 * when it is committed
2954 */ 2954 */
2955 TXN_UNLOCK(); 2955 TXN_UNLOCK();
2956 tid = txBegin(ip->i_sb, COMMIT_INODE); 2956 tid = txBegin(ip->i_sb, COMMIT_INODE);
2957 rc = txCommit(tid, 1, &ip, 0); 2957 rc = txCommit(tid, 1, &ip, 0);
2958 txEnd(tid); 2958 txEnd(tid);
2959 up(&jfs_ip->commit_sem); 2959 up(&jfs_ip->commit_sem);
2960 2960
2961 iput(ip); 2961 iput(ip);
2962 /* 2962 /*
2963 * Just to be safe. I don't know how 2963 * Just to be safe. I don't know how
2964 * long we can run without blocking 2964 * long we can run without blocking
2965 */ 2965 */
2966 cond_resched(); 2966 cond_resched();
2967 TXN_LOCK(); 2967 TXN_LOCK();
2968 } else { 2968 } else {
2969 /* We can't get the commit semaphore. It may 2969 /* We can't get the commit semaphore. It may
2970 * be held by a thread waiting for tlock's 2970 * be held by a thread waiting for tlock's
2971 * so let's not block here. Save it to 2971 * so let's not block here. Save it to
2972 * put back on the anon_list. 2972 * put back on the anon_list.
2973 */ 2973 */
2974 2974
2975 /* Take off anon_list */ 2975 /* Take off anon_list */
2976 list_del(&jfs_ip->anon_inode_list); 2976 list_del(&jfs_ip->anon_inode_list);
2977 2977
2978 /* Put on anon_list2 */ 2978 /* Put on anon_list2 */
2979 list_add(&jfs_ip->anon_inode_list, 2979 list_add(&jfs_ip->anon_inode_list,
2980 &TxAnchor.anon_list2); 2980 &TxAnchor.anon_list2);
2981 2981
2982 TXN_UNLOCK(); 2982 TXN_UNLOCK();
2983 iput(ip); 2983 iput(ip);
2984 TXN_LOCK(); 2984 TXN_LOCK();
2985 } 2985 }
2986 } 2986 }
2987 /* Add anon_list2 back to anon_list */ 2987 /* Add anon_list2 back to anon_list */
2988 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2988 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2989 2989
2990 if (freezing(current)) { 2990 if (freezing(current)) {
2991 TXN_UNLOCK(); 2991 TXN_UNLOCK();
2992 refrigerator(); 2992 refrigerator();
2993 } else { 2993 } else {
2994 DECLARE_WAITQUEUE(wq, current); 2994 DECLARE_WAITQUEUE(wq, current);
2995 2995
2996 add_wait_queue(&jfs_sync_thread_wait, &wq); 2996 add_wait_queue(&jfs_sync_thread_wait, &wq);
2997 set_current_state(TASK_INTERRUPTIBLE); 2997 set_current_state(TASK_INTERRUPTIBLE);
2998 TXN_UNLOCK(); 2998 TXN_UNLOCK();
2999 schedule(); 2999 schedule();
3000 current->state = TASK_RUNNING; 3000 current->state = TASK_RUNNING;
3001 remove_wait_queue(&jfs_sync_thread_wait, &wq); 3001 remove_wait_queue(&jfs_sync_thread_wait, &wq);
3002 } 3002 }
3003 } while (!jfs_stop_threads); 3003 } while (!jfs_stop_threads);
3004 3004
3005 jfs_info("jfs_sync being killed"); 3005 jfs_info("jfs_sync being killed");
3006 complete_and_exit(&jfsIOwait, 0); 3006 complete_and_exit(&jfsIOwait, 0);
3007 } 3007 }
3008 3008
3009 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) 3009 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
3010 int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, 3010 int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
3011 int *eof, void *data) 3011 int *eof, void *data)
3012 { 3012 {
3013 int len = 0; 3013 int len = 0;
3014 off_t begin; 3014 off_t begin;
3015 char *freewait; 3015 char *freewait;
3016 char *freelockwait; 3016 char *freelockwait;
3017 char *lowlockwait; 3017 char *lowlockwait;
3018 3018
3019 freewait = 3019 freewait =
3020 waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; 3020 waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
3021 freelockwait = 3021 freelockwait =
3022 waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; 3022 waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
3023 lowlockwait = 3023 lowlockwait =
3024 waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; 3024 waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
3025 3025
3026 len += sprintf(buffer, 3026 len += sprintf(buffer,
3027 "JFS TxAnchor\n" 3027 "JFS TxAnchor\n"
3028 "============\n" 3028 "============\n"
3029 "freetid = %d\n" 3029 "freetid = %d\n"
3030 "freewait = %s\n" 3030 "freewait = %s\n"
3031 "freelock = %d\n" 3031 "freelock = %d\n"
3032 "freelockwait = %s\n" 3032 "freelockwait = %s\n"
3033 "lowlockwait = %s\n" 3033 "lowlockwait = %s\n"
3034 "tlocksInUse = %d\n" 3034 "tlocksInUse = %d\n"
3035 "jfs_tlocks_low = %d\n" 3035 "jfs_tlocks_low = %d\n"
3036 "unlock_queue is %sempty\n", 3036 "unlock_queue is %sempty\n",
3037 TxAnchor.freetid, 3037 TxAnchor.freetid,
3038 freewait, 3038 freewait,
3039 TxAnchor.freelock, 3039 TxAnchor.freelock,
3040 freelockwait, 3040 freelockwait,
3041 lowlockwait, 3041 lowlockwait,
3042 TxAnchor.tlocksInUse, 3042 TxAnchor.tlocksInUse,
3043 jfs_tlocks_low, 3043 jfs_tlocks_low,
3044 list_empty(&TxAnchor.unlock_queue) ? "" : "not "); 3044 list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3045 3045
3046 begin = offset; 3046 begin = offset;
3047 *start = buffer + begin; 3047 *start = buffer + begin;
3048 len -= begin; 3048 len -= begin;
3049 3049
3050 if (len > length) 3050 if (len > length)
3051 len = length; 3051 len = length;
3052 else 3052 else
3053 *eof = 1; 3053 *eof = 1;
3054 3054
3055 if (len < 0) 3055 if (len < 0)
3056 len = 0; 3056 len = 0;
3057 3057
3058 return len; 3058 return len;
3059 } 3059 }
3060 #endif 3060 #endif
3061 3061
3062 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) 3062 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3063 int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, 3063 int jfs_txstats_read(char *buffer, char **start, off_t offset, int length,
3064 int *eof, void *data) 3064 int *eof, void *data)
3065 { 3065 {
3066 int len = 0; 3066 int len = 0;
3067 off_t begin; 3067 off_t begin;
3068 3068
3069 len += sprintf(buffer, 3069 len += sprintf(buffer,
3070 "JFS TxStats\n" 3070 "JFS TxStats\n"
3071 "===========\n" 3071 "===========\n"
3072 "calls to txBegin = %d\n" 3072 "calls to txBegin = %d\n"
3073 "txBegin blocked by sync barrier = %d\n" 3073 "txBegin blocked by sync barrier = %d\n"
3074 "txBegin blocked by tlocks low = %d\n" 3074 "txBegin blocked by tlocks low = %d\n"
3075 "txBegin blocked by no free tid = %d\n" 3075 "txBegin blocked by no free tid = %d\n"
3076 "calls to txBeginAnon = %d\n" 3076 "calls to txBeginAnon = %d\n"
3077 "txBeginAnon blocked by sync barrier = %d\n" 3077 "txBeginAnon blocked by sync barrier = %d\n"
3078 "txBeginAnon blocked by tlocks low = %d\n" 3078 "txBeginAnon blocked by tlocks low = %d\n"
3079 "calls to txLockAlloc = %d\n" 3079 "calls to txLockAlloc = %d\n"
3080 "tLockAlloc blocked by no free lock = %d\n", 3080 "tLockAlloc blocked by no free lock = %d\n",
3081 TxStat.txBegin, 3081 TxStat.txBegin,
3082 TxStat.txBegin_barrier, 3082 TxStat.txBegin_barrier,
3083 TxStat.txBegin_lockslow, 3083 TxStat.txBegin_lockslow,
3084 TxStat.txBegin_freetid, 3084 TxStat.txBegin_freetid,
3085 TxStat.txBeginAnon, 3085 TxStat.txBeginAnon,
3086 TxStat.txBeginAnon_barrier, 3086 TxStat.txBeginAnon_barrier,
3087 TxStat.txBeginAnon_lockslow, 3087 TxStat.txBeginAnon_lockslow,
3088 TxStat.txLockAlloc, 3088 TxStat.txLockAlloc,
3089 TxStat.txLockAlloc_freelock); 3089 TxStat.txLockAlloc_freelock);
3090 3090
3091 begin = offset; 3091 begin = offset;
3092 *start = buffer + begin; 3092 *start = buffer + begin;
3093 len -= begin; 3093 len -= begin;
3094 3094
3095 if (len > length) 3095 if (len > length)
3096 len = length; 3096 len = length;
3097 else 3097 else
3098 *eof = 1; 3098 *eof = 1;
3099 3099
3100 if (len < 0) 3100 if (len < 0)
3101 len = 0; 3101 len = 0;
1 /* 1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2004 2 * Copyright (C) International Business Machines Corp., 2000-2004
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20 #include <linux/fs.h> 20 #include <linux/fs.h>
21 #include <linux/config.h> 21 #include <linux/config.h>
22 #include <linux/module.h> 22 #include <linux/module.h>
23 #include <linux/parser.h> 23 #include <linux/parser.h>
24 #include <linux/completion.h> 24 #include <linux/completion.h>
25 #include <linux/vfs.h> 25 #include <linux/vfs.h>
26 #include <linux/moduleparam.h> 26 #include <linux/moduleparam.h>
27 #include <linux/posix_acl.h> 27 #include <linux/posix_acl.h>
28 #include <asm/uaccess.h> 28 #include <asm/uaccess.h>
29 29
30 #include "jfs_incore.h" 30 #include "jfs_incore.h"
31 #include "jfs_filsys.h" 31 #include "jfs_filsys.h"
32 #include "jfs_inode.h" 32 #include "jfs_inode.h"
33 #include "jfs_metapage.h" 33 #include "jfs_metapage.h"
34 #include "jfs_superblock.h" 34 #include "jfs_superblock.h"
35 #include "jfs_dmap.h" 35 #include "jfs_dmap.h"
36 #include "jfs_imap.h" 36 #include "jfs_imap.h"
37 #include "jfs_acl.h" 37 #include "jfs_acl.h"
38 #include "jfs_debug.h" 38 #include "jfs_debug.h"
39 39
40 MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); 40 MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
41 MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); 41 MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
42 MODULE_LICENSE("GPL"); 42 MODULE_LICENSE("GPL");
43 43
44 static kmem_cache_t * jfs_inode_cachep; 44 static kmem_cache_t * jfs_inode_cachep;
45 45
46 static struct super_operations jfs_super_operations; 46 static struct super_operations jfs_super_operations;
47 static struct export_operations jfs_export_operations; 47 static struct export_operations jfs_export_operations;
48 static struct file_system_type jfs_fs_type; 48 static struct file_system_type jfs_fs_type;
49 49
50 #define MAX_COMMIT_THREADS 64 50 #define MAX_COMMIT_THREADS 64
51 static int commit_threads = 0; 51 static int commit_threads = 0;
52 module_param(commit_threads, int, 0); 52 module_param(commit_threads, int, 0);
53 MODULE_PARM_DESC(commit_threads, "Number of commit threads"); 53 MODULE_PARM_DESC(commit_threads, "Number of commit threads");
54 54
55 int jfs_stop_threads; 55 int jfs_stop_threads;
56 static pid_t jfsIOthread; 56 static pid_t jfsIOthread;
57 static pid_t jfsCommitThread[MAX_COMMIT_THREADS]; 57 static pid_t jfsCommitThread[MAX_COMMIT_THREADS];
58 static pid_t jfsSyncThread; 58 static pid_t jfsSyncThread;
59 DECLARE_COMPLETION(jfsIOwait); 59 DECLARE_COMPLETION(jfsIOwait);
60 60
61 #ifdef CONFIG_JFS_DEBUG 61 #ifdef CONFIG_JFS_DEBUG
62 int jfsloglevel = JFS_LOGLEVEL_WARN; 62 int jfsloglevel = JFS_LOGLEVEL_WARN;
63 module_param(jfsloglevel, int, 0644); 63 module_param(jfsloglevel, int, 0644);
64 MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)"); 64 MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)");
65 #endif 65 #endif
66 66
67 static void jfs_handle_error(struct super_block *sb) 67 static void jfs_handle_error(struct super_block *sb)
68 { 68 {
69 struct jfs_sb_info *sbi = JFS_SBI(sb); 69 struct jfs_sb_info *sbi = JFS_SBI(sb);
70 70
71 if (sb->s_flags & MS_RDONLY) 71 if (sb->s_flags & MS_RDONLY)
72 return; 72 return;
73 73
74 updateSuper(sb, FM_DIRTY); 74 updateSuper(sb, FM_DIRTY);
75 75
76 if (sbi->flag & JFS_ERR_PANIC) 76 if (sbi->flag & JFS_ERR_PANIC)
77 panic("JFS (device %s): panic forced after error\n", 77 panic("JFS (device %s): panic forced after error\n",
78 sb->s_id); 78 sb->s_id);
79 else if (sbi->flag & JFS_ERR_REMOUNT_RO) { 79 else if (sbi->flag & JFS_ERR_REMOUNT_RO) {
80 jfs_err("ERROR: (device %s): remounting filesystem " 80 jfs_err("ERROR: (device %s): remounting filesystem "
81 "as read-only\n", 81 "as read-only\n",
82 sb->s_id); 82 sb->s_id);
83 sb->s_flags |= MS_RDONLY; 83 sb->s_flags |= MS_RDONLY;
84 } 84 }
85 85
86 /* nothing is done for continue beyond marking the superblock dirty */ 86 /* nothing is done for continue beyond marking the superblock dirty */
87 } 87 }
88 88
89 void jfs_error(struct super_block *sb, const char * function, ...) 89 void jfs_error(struct super_block *sb, const char * function, ...)
90 { 90 {
91 static char error_buf[256]; 91 static char error_buf[256];
92 va_list args; 92 va_list args;
93 93
94 va_start(args, function); 94 va_start(args, function);
95 vsprintf(error_buf, function, args); 95 vsprintf(error_buf, function, args);
96 va_end(args); 96 va_end(args);
97 97
98 printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); 98 printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf);
99 99
100 jfs_handle_error(sb); 100 jfs_handle_error(sb);
101 } 101 }
102 102
103 static struct inode *jfs_alloc_inode(struct super_block *sb) 103 static struct inode *jfs_alloc_inode(struct super_block *sb)
104 { 104 {
105 struct jfs_inode_info *jfs_inode; 105 struct jfs_inode_info *jfs_inode;
106 106
107 jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); 107 jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS);
108 if (!jfs_inode) 108 if (!jfs_inode)
109 return NULL; 109 return NULL;
110 return &jfs_inode->vfs_inode; 110 return &jfs_inode->vfs_inode;
111 } 111 }
112 112
113 static void jfs_destroy_inode(struct inode *inode) 113 static void jfs_destroy_inode(struct inode *inode)
114 { 114 {
115 struct jfs_inode_info *ji = JFS_IP(inode); 115 struct jfs_inode_info *ji = JFS_IP(inode);
116 116
117 spin_lock_irq(&ji->ag_lock); 117 spin_lock_irq(&ji->ag_lock);
118 if (ji->active_ag != -1) { 118 if (ji->active_ag != -1) {
119 struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; 119 struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
120 atomic_dec(&bmap->db_active[ji->active_ag]); 120 atomic_dec(&bmap->db_active[ji->active_ag]);
121 ji->active_ag = -1; 121 ji->active_ag = -1;
122 } 122 }
123 spin_unlock_irq(&ji->ag_lock); 123 spin_unlock_irq(&ji->ag_lock);
124 124
125 #ifdef CONFIG_JFS_POSIX_ACL 125 #ifdef CONFIG_JFS_POSIX_ACL
126 if (ji->i_acl != JFS_ACL_NOT_CACHED) { 126 if (ji->i_acl != JFS_ACL_NOT_CACHED) {
127 posix_acl_release(ji->i_acl); 127 posix_acl_release(ji->i_acl);
128 ji->i_acl = JFS_ACL_NOT_CACHED; 128 ji->i_acl = JFS_ACL_NOT_CACHED;
129 } 129 }
130 if (ji->i_default_acl != JFS_ACL_NOT_CACHED) { 130 if (ji->i_default_acl != JFS_ACL_NOT_CACHED) {
131 posix_acl_release(ji->i_default_acl); 131 posix_acl_release(ji->i_default_acl);
132 ji->i_default_acl = JFS_ACL_NOT_CACHED; 132 ji->i_default_acl = JFS_ACL_NOT_CACHED;
133 } 133 }
134 #endif 134 #endif
135 135
136 kmem_cache_free(jfs_inode_cachep, ji); 136 kmem_cache_free(jfs_inode_cachep, ji);
137 } 137 }
138 138
139 static int jfs_statfs(struct super_block *sb, struct kstatfs *buf) 139 static int jfs_statfs(struct super_block *sb, struct kstatfs *buf)
140 { 140 {
141 struct jfs_sb_info *sbi = JFS_SBI(sb); 141 struct jfs_sb_info *sbi = JFS_SBI(sb);
142 s64 maxinodes; 142 s64 maxinodes;
143 struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap; 143 struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap;
144 144
145 jfs_info("In jfs_statfs"); 145 jfs_info("In jfs_statfs");
146 buf->f_type = JFS_SUPER_MAGIC; 146 buf->f_type = JFS_SUPER_MAGIC;
147 buf->f_bsize = sbi->bsize; 147 buf->f_bsize = sbi->bsize;
148 buf->f_blocks = sbi->bmap->db_mapsize; 148 buf->f_blocks = sbi->bmap->db_mapsize;
149 buf->f_bfree = sbi->bmap->db_nfree; 149 buf->f_bfree = sbi->bmap->db_nfree;
150 buf->f_bavail = sbi->bmap->db_nfree; 150 buf->f_bavail = sbi->bmap->db_nfree;
151 /* 151 /*
152 * If we really return the number of allocated & free inodes, some 152 * If we really return the number of allocated & free inodes, some
153 * applications will fail because they won't see enough free inodes. 153 * applications will fail because they won't see enough free inodes.
154 * We'll try to calculate some guess as to how may inodes we can 154 * We'll try to calculate some guess as to how may inodes we can
155 * really allocate 155 * really allocate
156 * 156 *
157 * buf->f_files = atomic_read(&imap->im_numinos); 157 * buf->f_files = atomic_read(&imap->im_numinos);
158 * buf->f_ffree = atomic_read(&imap->im_numfree); 158 * buf->f_ffree = atomic_read(&imap->im_numfree);
159 */ 159 */
160 maxinodes = min((s64) atomic_read(&imap->im_numinos) + 160 maxinodes = min((s64) atomic_read(&imap->im_numinos) +
161 ((sbi->bmap->db_nfree >> imap->im_l2nbperiext) 161 ((sbi->bmap->db_nfree >> imap->im_l2nbperiext)
162 << L2INOSPEREXT), (s64) 0xffffffffLL); 162 << L2INOSPEREXT), (s64) 0xffffffffLL);
163 buf->f_files = maxinodes; 163 buf->f_files = maxinodes;
164 buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) - 164 buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
165 atomic_read(&imap->im_numfree)); 165 atomic_read(&imap->im_numfree));
166 166
167 buf->f_namelen = JFS_NAME_MAX; 167 buf->f_namelen = JFS_NAME_MAX;
168 return 0; 168 return 0;
169 } 169 }
170 170
171 static void jfs_put_super(struct super_block *sb) 171 static void jfs_put_super(struct super_block *sb)
172 { 172 {
173 struct jfs_sb_info *sbi = JFS_SBI(sb); 173 struct jfs_sb_info *sbi = JFS_SBI(sb);
174 int rc; 174 int rc;
175 175
176 jfs_info("In jfs_put_super"); 176 jfs_info("In jfs_put_super");
177 rc = jfs_umount(sb); 177 rc = jfs_umount(sb);
178 if (rc) 178 if (rc)
179 jfs_err("jfs_umount failed with return code %d", rc); 179 jfs_err("jfs_umount failed with return code %d", rc);
180 if (sbi->nls_tab) 180 if (sbi->nls_tab)
181 unload_nls(sbi->nls_tab); 181 unload_nls(sbi->nls_tab);
182 sbi->nls_tab = NULL; 182 sbi->nls_tab = NULL;
183 183
184 truncate_inode_pages(sbi->direct_inode->i_mapping, 0); 184 truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
185 iput(sbi->direct_inode); 185 iput(sbi->direct_inode);
186 sbi->direct_inode = NULL; 186 sbi->direct_inode = NULL;
187 187
188 kfree(sbi); 188 kfree(sbi);
189 } 189 }
190 190
191 enum { 191 enum {
192 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, 192 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
193 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, 193 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err,
194 }; 194 };
195 195
196 static match_table_t tokens = { 196 static match_table_t tokens = {
197 {Opt_integrity, "integrity"}, 197 {Opt_integrity, "integrity"},
198 {Opt_nointegrity, "nointegrity"}, 198 {Opt_nointegrity, "nointegrity"},
199 {Opt_iocharset, "iocharset=%s"}, 199 {Opt_iocharset, "iocharset=%s"},
200 {Opt_resize, "resize=%u"}, 200 {Opt_resize, "resize=%u"},
201 {Opt_resize_nosize, "resize"}, 201 {Opt_resize_nosize, "resize"},
202 {Opt_errors, "errors=%s"}, 202 {Opt_errors, "errors=%s"},
203 {Opt_ignore, "noquota"}, 203 {Opt_ignore, "noquota"},
204 {Opt_ignore, "quota"}, 204 {Opt_ignore, "quota"},
205 {Opt_ignore, "usrquota"}, 205 {Opt_ignore, "usrquota"},
206 {Opt_ignore, "grpquota"}, 206 {Opt_ignore, "grpquota"},
207 {Opt_err, NULL} 207 {Opt_err, NULL}
208 }; 208 };
209 209
210 static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, 210 static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
211 int *flag) 211 int *flag)
212 { 212 {
213 void *nls_map = (void *)-1; /* -1: no change; NULL: none */ 213 void *nls_map = (void *)-1; /* -1: no change; NULL: none */
214 char *p; 214 char *p;
215 struct jfs_sb_info *sbi = JFS_SBI(sb); 215 struct jfs_sb_info *sbi = JFS_SBI(sb);
216 216
217 *newLVSize = 0; 217 *newLVSize = 0;
218 218
219 if (!options) 219 if (!options)
220 return 1; 220 return 1;
221 221
222 while ((p = strsep(&options, ",")) != NULL) { 222 while ((p = strsep(&options, ",")) != NULL) {
223 substring_t args[MAX_OPT_ARGS]; 223 substring_t args[MAX_OPT_ARGS];
224 int token; 224 int token;
225 if (!*p) 225 if (!*p)
226 continue; 226 continue;
227 227
228 token = match_token(p, tokens, args); 228 token = match_token(p, tokens, args);
229 switch (token) { 229 switch (token) {
230 case Opt_integrity: 230 case Opt_integrity:
231 *flag &= ~JFS_NOINTEGRITY; 231 *flag &= ~JFS_NOINTEGRITY;
232 break; 232 break;
233 case Opt_nointegrity: 233 case Opt_nointegrity:
234 *flag |= JFS_NOINTEGRITY; 234 *flag |= JFS_NOINTEGRITY;
235 break; 235 break;
236 case Opt_ignore: 236 case Opt_ignore:
237 /* Silently ignore the quota options */ 237 /* Silently ignore the quota options */
238 /* Don't do anything ;-) */ 238 /* Don't do anything ;-) */
239 break; 239 break;
240 case Opt_iocharset: 240 case Opt_iocharset:
241 if (nls_map && nls_map != (void *) -1) 241 if (nls_map && nls_map != (void *) -1)
242 unload_nls(nls_map); 242 unload_nls(nls_map);
243 if (!strcmp(args[0].from, "none")) 243 if (!strcmp(args[0].from, "none"))
244 nls_map = NULL; 244 nls_map = NULL;
245 else { 245 else {
246 nls_map = load_nls(args[0].from); 246 nls_map = load_nls(args[0].from);
247 if (!nls_map) { 247 if (!nls_map) {
248 printk(KERN_ERR 248 printk(KERN_ERR
249 "JFS: charset not found\n"); 249 "JFS: charset not found\n");
250 goto cleanup; 250 goto cleanup;
251 } 251 }
252 } 252 }
253 break; 253 break;
254 case Opt_resize: 254 case Opt_resize:
255 { 255 {
256 char *resize = args[0].from; 256 char *resize = args[0].from;
257 *newLVSize = simple_strtoull(resize, &resize, 0); 257 *newLVSize = simple_strtoull(resize, &resize, 0);
258 break; 258 break;
259 } 259 }
260 case Opt_resize_nosize: 260 case Opt_resize_nosize:
261 { 261 {
262 *newLVSize = sb->s_bdev->bd_inode->i_size >> 262 *newLVSize = sb->s_bdev->bd_inode->i_size >>
263 sb->s_blocksize_bits; 263 sb->s_blocksize_bits;
264 if (*newLVSize == 0) 264 if (*newLVSize == 0)
265 printk(KERN_ERR 265 printk(KERN_ERR
266 "JFS: Cannot determine volume size\n"); 266 "JFS: Cannot determine volume size\n");
267 break; 267 break;
268 } 268 }
269 case Opt_errors: 269 case Opt_errors:
270 { 270 {
271 char *errors = args[0].from; 271 char *errors = args[0].from;
272 if (!errors || !*errors) 272 if (!errors || !*errors)
273 goto cleanup; 273 goto cleanup;
274 if (!strcmp(errors, "continue")) { 274 if (!strcmp(errors, "continue")) {
275 *flag &= ~JFS_ERR_REMOUNT_RO; 275 *flag &= ~JFS_ERR_REMOUNT_RO;
276 *flag &= ~JFS_ERR_PANIC; 276 *flag &= ~JFS_ERR_PANIC;
277 *flag |= JFS_ERR_CONTINUE; 277 *flag |= JFS_ERR_CONTINUE;
278 } else if (!strcmp(errors, "remount-ro")) { 278 } else if (!strcmp(errors, "remount-ro")) {
279 *flag &= ~JFS_ERR_CONTINUE; 279 *flag &= ~JFS_ERR_CONTINUE;
280 *flag &= ~JFS_ERR_PANIC; 280 *flag &= ~JFS_ERR_PANIC;
281 *flag |= JFS_ERR_REMOUNT_RO; 281 *flag |= JFS_ERR_REMOUNT_RO;
282 } else if (!strcmp(errors, "panic")) { 282 } else if (!strcmp(errors, "panic")) {
283 *flag &= ~JFS_ERR_CONTINUE; 283 *flag &= ~JFS_ERR_CONTINUE;
284 *flag &= ~JFS_ERR_REMOUNT_RO; 284 *flag &= ~JFS_ERR_REMOUNT_RO;
285 *flag |= JFS_ERR_PANIC; 285 *flag |= JFS_ERR_PANIC;
286 } else { 286 } else {
287 printk(KERN_ERR 287 printk(KERN_ERR
288 "JFS: %s is an invalid error handler\n", 288 "JFS: %s is an invalid error handler\n",
289 errors); 289 errors);
290 goto cleanup; 290 goto cleanup;
291 } 291 }
292 break; 292 break;
293 } 293 }
294 default: 294 default:
295 printk("jfs: Unrecognized mount option \"%s\" " 295 printk("jfs: Unrecognized mount option \"%s\" "
296 " or missing value\n", p); 296 " or missing value\n", p);
297 goto cleanup; 297 goto cleanup;
298 } 298 }
299 } 299 }
300 300
301 if (nls_map != (void *) -1) { 301 if (nls_map != (void *) -1) {
302 /* Discard old (if remount) */ 302 /* Discard old (if remount) */
303 if (sbi->nls_tab) 303 if (sbi->nls_tab)
304 unload_nls(sbi->nls_tab); 304 unload_nls(sbi->nls_tab);
305 sbi->nls_tab = nls_map; 305 sbi->nls_tab = nls_map;
306 } 306 }
307 return 1; 307 return 1;
308 308
309 cleanup: 309 cleanup:
310 if (nls_map && nls_map != (void *) -1) 310 if (nls_map && nls_map != (void *) -1)
311 unload_nls(nls_map); 311 unload_nls(nls_map);
312 return 0; 312 return 0;
313 } 313 }
314 314
315 static int jfs_remount(struct super_block *sb, int *flags, char *data) 315 static int jfs_remount(struct super_block *sb, int *flags, char *data)
316 { 316 {
317 s64 newLVSize = 0; 317 s64 newLVSize = 0;
318 int rc = 0; 318 int rc = 0;
319 int flag = JFS_SBI(sb)->flag; 319 int flag = JFS_SBI(sb)->flag;
320 320
321 if (!parse_options(data, sb, &newLVSize, &flag)) { 321 if (!parse_options(data, sb, &newLVSize, &flag)) {
322 return -EINVAL; 322 return -EINVAL;
323 } 323 }
324 if (newLVSize) { 324 if (newLVSize) {
325 if (sb->s_flags & MS_RDONLY) { 325 if (sb->s_flags & MS_RDONLY) {
326 printk(KERN_ERR 326 printk(KERN_ERR
327 "JFS: resize requires volume to be mounted read-write\n"); 327 "JFS: resize requires volume to be mounted read-write\n");
328 return -EROFS; 328 return -EROFS;
329 } 329 }
330 rc = jfs_extendfs(sb, newLVSize, 0); 330 rc = jfs_extendfs(sb, newLVSize, 0);
331 if (rc) 331 if (rc)
332 return rc; 332 return rc;
333 } 333 }
334 334
335 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 335 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
336 /* 336 /*
337 * Invalidate any previously read metadata. fsck may have 337 * Invalidate any previously read metadata. fsck may have
338 * changed the on-disk data since we mounted r/o 338 * changed the on-disk data since we mounted r/o
339 */ 339 */
340 truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); 340 truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
341 341
342 JFS_SBI(sb)->flag = flag; 342 JFS_SBI(sb)->flag = flag;
343 return jfs_mount_rw(sb, 1); 343 return jfs_mount_rw(sb, 1);
344 } 344 }
345 if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { 345 if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
346 rc = jfs_umount_rw(sb); 346 rc = jfs_umount_rw(sb);
347 JFS_SBI(sb)->flag = flag; 347 JFS_SBI(sb)->flag = flag;
348 return rc; 348 return rc;
349 } 349 }
350 if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) 350 if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY))
351 if (!(sb->s_flags & MS_RDONLY)) { 351 if (!(sb->s_flags & MS_RDONLY)) {
352 rc = jfs_umount_rw(sb); 352 rc = jfs_umount_rw(sb);
353 if (rc) 353 if (rc)
354 return rc; 354 return rc;
355 JFS_SBI(sb)->flag = flag; 355 JFS_SBI(sb)->flag = flag;
356 return jfs_mount_rw(sb, 1); 356 return jfs_mount_rw(sb, 1);
357 } 357 }
358 JFS_SBI(sb)->flag = flag; 358 JFS_SBI(sb)->flag = flag;
359 359
360 return 0; 360 return 0;
361 } 361 }
362 362
363 static int jfs_fill_super(struct super_block *sb, void *data, int silent) 363 static int jfs_fill_super(struct super_block *sb, void *data, int silent)
364 { 364 {
365 struct jfs_sb_info *sbi; 365 struct jfs_sb_info *sbi;
366 struct inode *inode; 366 struct inode *inode;
367 int rc; 367 int rc;
368 s64 newLVSize = 0; 368 s64 newLVSize = 0;
369 int flag; 369 int flag;
370 370
371 jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); 371 jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags);
372 372
373 if (!new_valid_dev(sb->s_bdev->bd_dev)) 373 if (!new_valid_dev(sb->s_bdev->bd_dev))
374 return -EOVERFLOW; 374 return -EOVERFLOW;
375 375
376 sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); 376 sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
377 if (!sbi) 377 if (!sbi)
378 return -ENOSPC; 378 return -ENOSPC;
379 memset(sbi, 0, sizeof (struct jfs_sb_info)); 379 memset(sbi, 0, sizeof (struct jfs_sb_info));
380 sb->s_fs_info = sbi; 380 sb->s_fs_info = sbi;
381 sbi->sb = sb; 381 sbi->sb = sb;
382 382
383 /* initialize the mount flag and determine the default error handler */ 383 /* initialize the mount flag and determine the default error handler */
384 flag = JFS_ERR_REMOUNT_RO; 384 flag = JFS_ERR_REMOUNT_RO;
385 385
386 if (!parse_options((char *) data, sb, &newLVSize, &flag)) { 386 if (!parse_options((char *) data, sb, &newLVSize, &flag)) {
387 kfree(sbi); 387 kfree(sbi);
388 return -EINVAL; 388 return -EINVAL;
389 } 389 }
390 sbi->flag = flag; 390 sbi->flag = flag;
391 391
392 #ifdef CONFIG_JFS_POSIX_ACL 392 #ifdef CONFIG_JFS_POSIX_ACL
393 sb->s_flags |= MS_POSIXACL; 393 sb->s_flags |= MS_POSIXACL;
394 #endif 394 #endif
395 395
396 if (newLVSize) { 396 if (newLVSize) {
397 printk(KERN_ERR "resize option for remount only\n"); 397 printk(KERN_ERR "resize option for remount only\n");
398 return -EINVAL; 398 return -EINVAL;
399 } 399 }
400 400
401 /* 401 /*
402 * Initialize blocksize to 4K. 402 * Initialize blocksize to 4K.
403 */ 403 */
404 sb_set_blocksize(sb, PSIZE); 404 sb_set_blocksize(sb, PSIZE);
405 405
406 /* 406 /*
407 * Set method vectors. 407 * Set method vectors.
408 */ 408 */
409 sb->s_op = &jfs_super_operations; 409 sb->s_op = &jfs_super_operations;
410 sb->s_export_op = &jfs_export_operations; 410 sb->s_export_op = &jfs_export_operations;
411 411
412 /* 412 /*
413 * Initialize direct-mapping inode/address-space 413 * Initialize direct-mapping inode/address-space
414 */ 414 */
415 inode = new_inode(sb); 415 inode = new_inode(sb);
416 if (inode == NULL) 416 if (inode == NULL)
417 goto out_kfree; 417 goto out_kfree;
418 inode->i_ino = 0; 418 inode->i_ino = 0;
419 inode->i_nlink = 1; 419 inode->i_nlink = 1;
420 inode->i_size = sb->s_bdev->bd_inode->i_size; 420 inode->i_size = sb->s_bdev->bd_inode->i_size;
421 inode->i_mapping->a_ops = &jfs_metapage_aops; 421 inode->i_mapping->a_ops = &jfs_metapage_aops;
422 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 422 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
423 423
424 sbi->direct_inode = inode; 424 sbi->direct_inode = inode;
425 425
426 rc = jfs_mount(sb); 426 rc = jfs_mount(sb);
427 if (rc) { 427 if (rc) {
428 if (!silent) { 428 if (!silent) {
429 jfs_err("jfs_mount failed w/return code = %d", rc); 429 jfs_err("jfs_mount failed w/return code = %d", rc);
430 } 430 }
431 goto out_mount_failed; 431 goto out_mount_failed;
432 } 432 }
433 if (sb->s_flags & MS_RDONLY) 433 if (sb->s_flags & MS_RDONLY)
434 sbi->log = NULL; 434 sbi->log = NULL;
435 else { 435 else {
436 rc = jfs_mount_rw(sb, 0); 436 rc = jfs_mount_rw(sb, 0);
437 if (rc) { 437 if (rc) {
438 if (!silent) { 438 if (!silent) {
439 jfs_err("jfs_mount_rw failed, return code = %d", 439 jfs_err("jfs_mount_rw failed, return code = %d",
440 rc); 440 rc);
441 } 441 }
442 goto out_no_rw; 442 goto out_no_rw;
443 } 443 }
444 } 444 }
445 445
446 sb->s_magic = JFS_SUPER_MAGIC; 446 sb->s_magic = JFS_SUPER_MAGIC;
447 447
448 inode = iget(sb, ROOT_I); 448 inode = iget(sb, ROOT_I);
449 if (!inode || is_bad_inode(inode)) 449 if (!inode || is_bad_inode(inode))
450 goto out_no_root; 450 goto out_no_root;
451 sb->s_root = d_alloc_root(inode); 451 sb->s_root = d_alloc_root(inode);
452 if (!sb->s_root) 452 if (!sb->s_root)
453 goto out_no_root; 453 goto out_no_root;
454 454
455 if (sbi->mntflag & JFS_OS2) 455 if (sbi->mntflag & JFS_OS2)
456 sb->s_root->d_op = &jfs_ci_dentry_operations; 456 sb->s_root->d_op = &jfs_ci_dentry_operations;
457 457
458 /* logical blocks are represented by 40 bits in pxd_t, etc. */ 458 /* logical blocks are represented by 40 bits in pxd_t, etc. */
459 sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; 459 sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
460 #if BITS_PER_LONG == 32 460 #if BITS_PER_LONG == 32
461 /* 461 /*
462 * Page cache is indexed by long. 462 * Page cache is indexed by long.
463 * I would use MAX_LFS_FILESIZE, but it's only half as big 463 * I would use MAX_LFS_FILESIZE, but it's only half as big
464 */ 464 */
465 sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes); 465 sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes);
466 #endif 466 #endif
467 sb->s_time_gran = 1; 467 sb->s_time_gran = 1;
468 return 0; 468 return 0;
469 469
470 out_no_root: 470 out_no_root:
471 jfs_err("jfs_read_super: get root inode failed"); 471 jfs_err("jfs_read_super: get root inode failed");
472 if (inode) 472 if (inode)
473 iput(inode); 473 iput(inode);
474 474
475 out_no_rw: 475 out_no_rw:
476 rc = jfs_umount(sb); 476 rc = jfs_umount(sb);
477 if (rc) { 477 if (rc) {
478 jfs_err("jfs_umount failed with return code %d", rc); 478 jfs_err("jfs_umount failed with return code %d", rc);
479 } 479 }
480 out_mount_failed: 480 out_mount_failed:
481 filemap_fdatawrite(sbi->direct_inode->i_mapping); 481 filemap_fdatawrite(sbi->direct_inode->i_mapping);
482 filemap_fdatawait(sbi->direct_inode->i_mapping); 482 filemap_fdatawait(sbi->direct_inode->i_mapping);
483 truncate_inode_pages(sbi->direct_inode->i_mapping, 0); 483 truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
484 make_bad_inode(sbi->direct_inode); 484 make_bad_inode(sbi->direct_inode);
485 iput(sbi->direct_inode); 485 iput(sbi->direct_inode);
486 sbi->direct_inode = NULL; 486 sbi->direct_inode = NULL;
487 out_kfree: 487 out_kfree:
488 if (sbi->nls_tab) 488 if (sbi->nls_tab)
489 unload_nls(sbi->nls_tab); 489 unload_nls(sbi->nls_tab);
490 kfree(sbi); 490 kfree(sbi);
491 return -EINVAL; 491 return -EINVAL;
492 } 492 }
493 493
494 static void jfs_write_super_lockfs(struct super_block *sb) 494 static void jfs_write_super_lockfs(struct super_block *sb)
495 { 495 {
496 struct jfs_sb_info *sbi = JFS_SBI(sb); 496 struct jfs_sb_info *sbi = JFS_SBI(sb);
497 struct jfs_log *log = sbi->log; 497 struct jfs_log *log = sbi->log;
498 498
499 if (!(sb->s_flags & MS_RDONLY)) { 499 if (!(sb->s_flags & MS_RDONLY)) {
500 txQuiesce(sb); 500 txQuiesce(sb);
501 lmLogShutdown(log); 501 lmLogShutdown(log);
502 updateSuper(sb, FM_CLEAN); 502 updateSuper(sb, FM_CLEAN);
503 } 503 }
504 } 504 }
505 505
506 static void jfs_unlockfs(struct super_block *sb) 506 static void jfs_unlockfs(struct super_block *sb)
507 { 507 {
508 struct jfs_sb_info *sbi = JFS_SBI(sb); 508 struct jfs_sb_info *sbi = JFS_SBI(sb);
509 struct jfs_log *log = sbi->log; 509 struct jfs_log *log = sbi->log;
510 int rc = 0; 510 int rc = 0;
511 511
512 if (!(sb->s_flags & MS_RDONLY)) { 512 if (!(sb->s_flags & MS_RDONLY)) {
513 updateSuper(sb, FM_MOUNT); 513 updateSuper(sb, FM_MOUNT);
514 if ((rc = lmLogInit(log))) 514 if ((rc = lmLogInit(log)))
515 jfs_err("jfs_unlock failed with return code %d", rc); 515 jfs_err("jfs_unlock failed with return code %d", rc);
516 else 516 else
517 txResume(sb); 517 txResume(sb);
518 } 518 }
519 } 519 }
520 520
521 static struct super_block *jfs_get_sb(struct file_system_type *fs_type, 521 static struct super_block *jfs_get_sb(struct file_system_type *fs_type,
522 int flags, const char *dev_name, void *data) 522 int flags, const char *dev_name, void *data)
523 { 523 {
524 return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super); 524 return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
525 } 525 }
526 526
527 static int jfs_sync_fs(struct super_block *sb, int wait) 527 static int jfs_sync_fs(struct super_block *sb, int wait)
528 { 528 {
529 struct jfs_log *log = JFS_SBI(sb)->log; 529 struct jfs_log *log = JFS_SBI(sb)->log;
530 530
531 /* log == NULL indicates read-only mount */ 531 /* log == NULL indicates read-only mount */
532 if (log) { 532 if (log) {
533 jfs_flush_journal(log, wait); 533 jfs_flush_journal(log, wait);
534 jfs_syncpt(log); 534 jfs_syncpt(log, 0);
535 } 535 }
536 536
537 return 0; 537 return 0;
538 } 538 }
539 539
540 static struct super_operations jfs_super_operations = { 540 static struct super_operations jfs_super_operations = {
541 .alloc_inode = jfs_alloc_inode, 541 .alloc_inode = jfs_alloc_inode,
542 .destroy_inode = jfs_destroy_inode, 542 .destroy_inode = jfs_destroy_inode,
543 .read_inode = jfs_read_inode, 543 .read_inode = jfs_read_inode,
544 .dirty_inode = jfs_dirty_inode, 544 .dirty_inode = jfs_dirty_inode,
545 .write_inode = jfs_write_inode, 545 .write_inode = jfs_write_inode,
546 .delete_inode = jfs_delete_inode, 546 .delete_inode = jfs_delete_inode,
547 .put_super = jfs_put_super, 547 .put_super = jfs_put_super,
548 .sync_fs = jfs_sync_fs, 548 .sync_fs = jfs_sync_fs,
549 .write_super_lockfs = jfs_write_super_lockfs, 549 .write_super_lockfs = jfs_write_super_lockfs,
550 .unlockfs = jfs_unlockfs, 550 .unlockfs = jfs_unlockfs,
551 .statfs = jfs_statfs, 551 .statfs = jfs_statfs,
552 .remount_fs = jfs_remount, 552 .remount_fs = jfs_remount,
553 }; 553 };
554 554
555 static struct export_operations jfs_export_operations = { 555 static struct export_operations jfs_export_operations = {
556 .get_parent = jfs_get_parent, 556 .get_parent = jfs_get_parent,
557 }; 557 };
558 558
559 static struct file_system_type jfs_fs_type = { 559 static struct file_system_type jfs_fs_type = {
560 .owner = THIS_MODULE, 560 .owner = THIS_MODULE,
561 .name = "jfs", 561 .name = "jfs",
562 .get_sb = jfs_get_sb, 562 .get_sb = jfs_get_sb,
563 .kill_sb = kill_block_super, 563 .kill_sb = kill_block_super,
564 .fs_flags = FS_REQUIRES_DEV, 564 .fs_flags = FS_REQUIRES_DEV,
565 }; 565 };
566 566
567 static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) 567 static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
568 { 568 {
569 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; 569 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
570 570
571 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == 571 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
572 SLAB_CTOR_CONSTRUCTOR) { 572 SLAB_CTOR_CONSTRUCTOR) {
573 memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); 573 memset(jfs_ip, 0, sizeof(struct jfs_inode_info));
574 INIT_LIST_HEAD(&jfs_ip->anon_inode_list); 574 INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
575 init_rwsem(&jfs_ip->rdwrlock); 575 init_rwsem(&jfs_ip->rdwrlock);
576 init_MUTEX(&jfs_ip->commit_sem); 576 init_MUTEX(&jfs_ip->commit_sem);
577 init_rwsem(&jfs_ip->xattr_sem); 577 init_rwsem(&jfs_ip->xattr_sem);
578 spin_lock_init(&jfs_ip->ag_lock); 578 spin_lock_init(&jfs_ip->ag_lock);
579 jfs_ip->active_ag = -1; 579 jfs_ip->active_ag = -1;
580 #ifdef CONFIG_JFS_POSIX_ACL 580 #ifdef CONFIG_JFS_POSIX_ACL
581 jfs_ip->i_acl = JFS_ACL_NOT_CACHED; 581 jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
582 jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; 582 jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
583 #endif 583 #endif
584 inode_init_once(&jfs_ip->vfs_inode); 584 inode_init_once(&jfs_ip->vfs_inode);
585 } 585 }
586 } 586 }
587 587
588 static int __init init_jfs_fs(void) 588 static int __init init_jfs_fs(void)
589 { 589 {
590 int i; 590 int i;
591 int rc; 591 int rc;
592 592
593 jfs_inode_cachep = 593 jfs_inode_cachep =
594 kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 594 kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0,
595 SLAB_RECLAIM_ACCOUNT, init_once, NULL); 595 SLAB_RECLAIM_ACCOUNT, init_once, NULL);
596 if (jfs_inode_cachep == NULL) 596 if (jfs_inode_cachep == NULL)
597 return -ENOMEM; 597 return -ENOMEM;
598 598
599 /* 599 /*
600 * Metapage initialization 600 * Metapage initialization
601 */ 601 */
602 rc = metapage_init(); 602 rc = metapage_init();
603 if (rc) { 603 if (rc) {
604 jfs_err("metapage_init failed w/rc = %d", rc); 604 jfs_err("metapage_init failed w/rc = %d", rc);
605 goto free_slab; 605 goto free_slab;
606 } 606 }
607 607
608 /* 608 /*
609 * Transaction Manager initialization 609 * Transaction Manager initialization
610 */ 610 */
611 rc = txInit(); 611 rc = txInit();
612 if (rc) { 612 if (rc) {
613 jfs_err("txInit failed w/rc = %d", rc); 613 jfs_err("txInit failed w/rc = %d", rc);
614 goto free_metapage; 614 goto free_metapage;
615 } 615 }
616 616
617 /* 617 /*
618 * I/O completion thread (endio) 618 * I/O completion thread (endio)
619 */ 619 */
620 jfsIOthread = kernel_thread(jfsIOWait, NULL, CLONE_KERNEL); 620 jfsIOthread = kernel_thread(jfsIOWait, NULL, CLONE_KERNEL);
621 if (jfsIOthread < 0) { 621 if (jfsIOthread < 0) {
622 jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsIOthread); 622 jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsIOthread);
623 goto end_txmngr; 623 goto end_txmngr;
624 } 624 }
625 wait_for_completion(&jfsIOwait); /* Wait until thread starts */ 625 wait_for_completion(&jfsIOwait); /* Wait until thread starts */
626 626
627 if (commit_threads < 1) 627 if (commit_threads < 1)
628 commit_threads = num_online_cpus(); 628 commit_threads = num_online_cpus();
629 if (commit_threads > MAX_COMMIT_THREADS) 629 if (commit_threads > MAX_COMMIT_THREADS)
630 commit_threads = MAX_COMMIT_THREADS; 630 commit_threads = MAX_COMMIT_THREADS;
631 631
632 for (i = 0; i < commit_threads; i++) { 632 for (i = 0; i < commit_threads; i++) {
633 jfsCommitThread[i] = kernel_thread(jfs_lazycommit, NULL, 633 jfsCommitThread[i] = kernel_thread(jfs_lazycommit, NULL,
634 CLONE_KERNEL); 634 CLONE_KERNEL);
635 if (jfsCommitThread[i] < 0) { 635 if (jfsCommitThread[i] < 0) {
636 jfs_err("init_jfs_fs: fork failed w/rc = %d", 636 jfs_err("init_jfs_fs: fork failed w/rc = %d",
637 jfsCommitThread[i]); 637 jfsCommitThread[i]);
638 commit_threads = i; 638 commit_threads = i;
639 goto kill_committask; 639 goto kill_committask;
640 } 640 }
641 /* Wait until thread starts */ 641 /* Wait until thread starts */
642 wait_for_completion(&jfsIOwait); 642 wait_for_completion(&jfsIOwait);
643 } 643 }
644 644
645 jfsSyncThread = kernel_thread(jfs_sync, NULL, CLONE_KERNEL); 645 jfsSyncThread = kernel_thread(jfs_sync, NULL, CLONE_KERNEL);
646 if (jfsSyncThread < 0) { 646 if (jfsSyncThread < 0) {
647 jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsSyncThread); 647 jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsSyncThread);
648 goto kill_committask; 648 goto kill_committask;
649 } 649 }
650 wait_for_completion(&jfsIOwait); /* Wait until thread starts */ 650 wait_for_completion(&jfsIOwait); /* Wait until thread starts */
651 651
652 #ifdef PROC_FS_JFS 652 #ifdef PROC_FS_JFS
653 jfs_proc_init(); 653 jfs_proc_init();
654 #endif 654 #endif
655 655
656 return register_filesystem(&jfs_fs_type); 656 return register_filesystem(&jfs_fs_type);
657 657
658 kill_committask: 658 kill_committask:
659 jfs_stop_threads = 1; 659 jfs_stop_threads = 1;
660 wake_up_all(&jfs_commit_thread_wait); 660 wake_up_all(&jfs_commit_thread_wait);
661 for (i = 0; i < commit_threads; i++) 661 for (i = 0; i < commit_threads; i++)
662 wait_for_completion(&jfsIOwait); 662 wait_for_completion(&jfsIOwait);
663 663
664 wake_up(&jfs_IO_thread_wait); 664 wake_up(&jfs_IO_thread_wait);
665 wait_for_completion(&jfsIOwait); /* Wait for thread exit */ 665 wait_for_completion(&jfsIOwait); /* Wait for thread exit */
666 end_txmngr: 666 end_txmngr:
667 txExit(); 667 txExit();
668 free_metapage: 668 free_metapage:
669 metapage_exit(); 669 metapage_exit();
670 free_slab: 670 free_slab:
671 kmem_cache_destroy(jfs_inode_cachep); 671 kmem_cache_destroy(jfs_inode_cachep);
672 return rc; 672 return rc;
673 } 673 }
674 674
675 static void __exit exit_jfs_fs(void) 675 static void __exit exit_jfs_fs(void)
676 { 676 {
677 int i; 677 int i;
678 678
679 jfs_info("exit_jfs_fs called"); 679 jfs_info("exit_jfs_fs called");
680 680
681 jfs_stop_threads = 1; 681 jfs_stop_threads = 1;
682 txExit(); 682 txExit();
683 metapage_exit(); 683 metapage_exit();
684 wake_up(&jfs_IO_thread_wait); 684 wake_up(&jfs_IO_thread_wait);
685 wait_for_completion(&jfsIOwait); /* Wait until IO thread exits */ 685 wait_for_completion(&jfsIOwait); /* Wait until IO thread exits */
686 wake_up_all(&jfs_commit_thread_wait); 686 wake_up_all(&jfs_commit_thread_wait);
687 for (i = 0; i < commit_threads; i++) 687 for (i = 0; i < commit_threads; i++)
688 wait_for_completion(&jfsIOwait); 688 wait_for_completion(&jfsIOwait);
689 wake_up(&jfs_sync_thread_wait); 689 wake_up(&jfs_sync_thread_wait);
690 wait_for_completion(&jfsIOwait); /* Wait until Sync thread exits */ 690 wait_for_completion(&jfsIOwait); /* Wait until Sync thread exits */
691 #ifdef PROC_FS_JFS 691 #ifdef PROC_FS_JFS
692 jfs_proc_clean(); 692 jfs_proc_clean();
693 #endif 693 #endif
694 unregister_filesystem(&jfs_fs_type); 694 unregister_filesystem(&jfs_fs_type);
695 kmem_cache_destroy(jfs_inode_cachep); 695 kmem_cache_destroy(jfs_inode_cachep);
696 } 696 }
697 697
698 module_init(init_jfs_fs) 698 module_init(init_jfs_fs)
699 module_exit(exit_jfs_fs) 699 module_exit(exit_jfs_fs)
700 700