Commit cbc3d65ebcb0c494183d45cf202a53352cbf3871
1 parent
de8fd087b2
Exists in
master
and in
7 other branches
JFS: Improve sync barrier processing
Under heavy load, hot metadata pages are often locked by non-committed transactions, making them difficult to flush to disk. This prevents the sync point from advancing past a transaction that had modified the page. There is a point during the sync barrier processing where all outstanding transactions have been committed to disk, but no new transaction have been allowed to proceed. This is the best time to write the metadata. Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Showing 4 changed files with 26 additions and 24 deletions Inline Diff
fs/jfs/jfs_logmgr.c
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2004 | 2 | * Copyright (C) International Business Machines Corp., 2000-2004 |
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | 3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
20 | /* | 20 | /* |
21 | * jfs_logmgr.c: log manager | 21 | * jfs_logmgr.c: log manager |
22 | * | 22 | * |
23 | * for related information, see transaction manager (jfs_txnmgr.c), and | 23 | * for related information, see transaction manager (jfs_txnmgr.c), and |
24 | * recovery manager (jfs_logredo.c). | 24 | * recovery manager (jfs_logredo.c). |
25 | * | 25 | * |
26 | * note: for detail, RTFS. | 26 | * note: for detail, RTFS. |
27 | * | 27 | * |
28 | * log buffer manager: | 28 | * log buffer manager: |
29 | * special purpose buffer manager supporting log i/o requirements. | 29 | * special purpose buffer manager supporting log i/o requirements. |
30 | * per log serial pageout of logpage | 30 | * per log serial pageout of logpage |
31 | * queuing i/o requests and redrive i/o at iodone | 31 | * queuing i/o requests and redrive i/o at iodone |
32 | * maintain current logpage buffer | 32 | * maintain current logpage buffer |
33 | * no caching since append only | 33 | * no caching since append only |
34 | * appropriate jfs buffer cache buffers as needed | 34 | * appropriate jfs buffer cache buffers as needed |
35 | * | 35 | * |
36 | * group commit: | 36 | * group commit: |
37 | * transactions which wrote COMMIT records in the same in-memory | 37 | * transactions which wrote COMMIT records in the same in-memory |
38 | * log page during the pageout of previous/current log page(s) are | 38 | * log page during the pageout of previous/current log page(s) are |
39 | * committed together by the pageout of the page. | 39 | * committed together by the pageout of the page. |
40 | * | 40 | * |
41 | * TBD lazy commit: | 41 | * TBD lazy commit: |
42 | * transactions are committed asynchronously when the log page | 42 | * transactions are committed asynchronously when the log page |
43 | * containing it COMMIT is paged out when it becomes full; | 43 | * containing it COMMIT is paged out when it becomes full; |
44 | * | 44 | * |
45 | * serialization: | 45 | * serialization: |
46 | * . a per log lock serialize log write. | 46 | * . a per log lock serialize log write. |
47 | * . a per log lock serialize group commit. | 47 | * . a per log lock serialize group commit. |
48 | * . a per log lock serialize log open/close; | 48 | * . a per log lock serialize log open/close; |
49 | * | 49 | * |
50 | * TBD log integrity: | 50 | * TBD log integrity: |
51 | * careful-write (ping-pong) of last logpage to recover from crash | 51 | * careful-write (ping-pong) of last logpage to recover from crash |
52 | * in overwrite. | 52 | * in overwrite. |
53 | * detection of split (out-of-order) write of physical sectors | 53 | * detection of split (out-of-order) write of physical sectors |
54 | * of last logpage via timestamp at end of each sector | 54 | * of last logpage via timestamp at end of each sector |
55 | * with its mirror data array at trailer). | 55 | * with its mirror data array at trailer). |
56 | * | 56 | * |
57 | * alternatives: | 57 | * alternatives: |
58 | * lsn - 64-bit monotonically increasing integer vs | 58 | * lsn - 64-bit monotonically increasing integer vs |
59 | * 32-bit lspn and page eor. | 59 | * 32-bit lspn and page eor. |
60 | */ | 60 | */ |
61 | 61 | ||
62 | #include <linux/fs.h> | 62 | #include <linux/fs.h> |
63 | #include <linux/blkdev.h> | 63 | #include <linux/blkdev.h> |
64 | #include <linux/interrupt.h> | 64 | #include <linux/interrupt.h> |
65 | #include <linux/smp_lock.h> | 65 | #include <linux/smp_lock.h> |
66 | #include <linux/completion.h> | 66 | #include <linux/completion.h> |
67 | #include <linux/buffer_head.h> /* for sync_blockdev() */ | 67 | #include <linux/buffer_head.h> /* for sync_blockdev() */ |
68 | #include <linux/bio.h> | 68 | #include <linux/bio.h> |
69 | #include <linux/suspend.h> | 69 | #include <linux/suspend.h> |
70 | #include <linux/delay.h> | 70 | #include <linux/delay.h> |
71 | #include "jfs_incore.h" | 71 | #include "jfs_incore.h" |
72 | #include "jfs_filsys.h" | 72 | #include "jfs_filsys.h" |
73 | #include "jfs_metapage.h" | 73 | #include "jfs_metapage.h" |
74 | #include "jfs_superblock.h" | 74 | #include "jfs_superblock.h" |
75 | #include "jfs_txnmgr.h" | 75 | #include "jfs_txnmgr.h" |
76 | #include "jfs_debug.h" | 76 | #include "jfs_debug.h" |
77 | 77 | ||
78 | 78 | ||
79 | /* | 79 | /* |
80 | * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread) | 80 | * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread) |
81 | */ | 81 | */ |
82 | static struct lbuf *log_redrive_list; | 82 | static struct lbuf *log_redrive_list; |
83 | static DEFINE_SPINLOCK(log_redrive_lock); | 83 | static DEFINE_SPINLOCK(log_redrive_lock); |
84 | DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait); | 84 | DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait); |
85 | 85 | ||
86 | 86 | ||
87 | /* | 87 | /* |
88 | * log read/write serialization (per log) | 88 | * log read/write serialization (per log) |
89 | */ | 89 | */ |
90 | #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) | 90 | #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) |
91 | #define LOG_LOCK(log) down(&((log)->loglock)) | 91 | #define LOG_LOCK(log) down(&((log)->loglock)) |
92 | #define LOG_UNLOCK(log) up(&((log)->loglock)) | 92 | #define LOG_UNLOCK(log) up(&((log)->loglock)) |
93 | 93 | ||
94 | 94 | ||
95 | /* | 95 | /* |
96 | * log group commit serialization (per log) | 96 | * log group commit serialization (per log) |
97 | */ | 97 | */ |
98 | 98 | ||
99 | #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock) | 99 | #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock) |
100 | #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock) | 100 | #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock) |
101 | #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock) | 101 | #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock) |
102 | #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait) | 102 | #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait) |
103 | 103 | ||
104 | /* | 104 | /* |
105 | * log sync serialization (per log) | 105 | * log sync serialization (per log) |
106 | */ | 106 | */ |
107 | #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE) | 107 | #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE) |
108 | #define LOGSYNC_BARRIER(logsize) ((logsize)/4) | 108 | #define LOGSYNC_BARRIER(logsize) ((logsize)/4) |
109 | /* | 109 | /* |
110 | #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE) | 110 | #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE) |
111 | #define LOGSYNC_BARRIER(logsize) ((logsize)/2) | 111 | #define LOGSYNC_BARRIER(logsize) ((logsize)/2) |
112 | */ | 112 | */ |
113 | 113 | ||
114 | 114 | ||
115 | /* | 115 | /* |
116 | * log buffer cache synchronization | 116 | * log buffer cache synchronization |
117 | */ | 117 | */ |
118 | static DEFINE_SPINLOCK(jfsLCacheLock); | 118 | static DEFINE_SPINLOCK(jfsLCacheLock); |
119 | 119 | ||
120 | #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags) | 120 | #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags) |
121 | #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags) | 121 | #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags) |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * See __SLEEP_COND in jfs_locks.h | 124 | * See __SLEEP_COND in jfs_locks.h |
125 | */ | 125 | */ |
126 | #define LCACHE_SLEEP_COND(wq, cond, flags) \ | 126 | #define LCACHE_SLEEP_COND(wq, cond, flags) \ |
127 | do { \ | 127 | do { \ |
128 | if (cond) \ | 128 | if (cond) \ |
129 | break; \ | 129 | break; \ |
130 | __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \ | 130 | __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \ |
131 | } while (0) | 131 | } while (0) |
132 | 132 | ||
133 | #define LCACHE_WAKEUP(event) wake_up(event) | 133 | #define LCACHE_WAKEUP(event) wake_up(event) |
134 | 134 | ||
135 | 135 | ||
136 | /* | 136 | /* |
137 | * lbuf buffer cache (lCache) control | 137 | * lbuf buffer cache (lCache) control |
138 | */ | 138 | */ |
139 | /* log buffer manager pageout control (cumulative, inclusive) */ | 139 | /* log buffer manager pageout control (cumulative, inclusive) */ |
140 | #define lbmREAD 0x0001 | 140 | #define lbmREAD 0x0001 |
141 | #define lbmWRITE 0x0002 /* enqueue at tail of write queue; | 141 | #define lbmWRITE 0x0002 /* enqueue at tail of write queue; |
142 | * init pageout if at head of queue; | 142 | * init pageout if at head of queue; |
143 | */ | 143 | */ |
144 | #define lbmRELEASE 0x0004 /* remove from write queue | 144 | #define lbmRELEASE 0x0004 /* remove from write queue |
145 | * at completion of pageout; | 145 | * at completion of pageout; |
146 | * do not free/recycle it yet: | 146 | * do not free/recycle it yet: |
147 | * caller will free it; | 147 | * caller will free it; |
148 | */ | 148 | */ |
149 | #define lbmSYNC 0x0008 /* do not return to freelist | 149 | #define lbmSYNC 0x0008 /* do not return to freelist |
150 | * when removed from write queue; | 150 | * when removed from write queue; |
151 | */ | 151 | */ |
152 | #define lbmFREE 0x0010 /* return to freelist | 152 | #define lbmFREE 0x0010 /* return to freelist |
153 | * at completion of pageout; | 153 | * at completion of pageout; |
154 | * the buffer may be recycled; | 154 | * the buffer may be recycled; |
155 | */ | 155 | */ |
156 | #define lbmDONE 0x0020 | 156 | #define lbmDONE 0x0020 |
157 | #define lbmERROR 0x0040 | 157 | #define lbmERROR 0x0040 |
158 | #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing | 158 | #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing |
159 | * of log page | 159 | * of log page |
160 | */ | 160 | */ |
161 | #define lbmDIRECT 0x0100 | 161 | #define lbmDIRECT 0x0100 |
162 | 162 | ||
163 | /* | 163 | /* |
164 | * Global list of active external journals | 164 | * Global list of active external journals |
165 | */ | 165 | */ |
166 | static LIST_HEAD(jfs_external_logs); | 166 | static LIST_HEAD(jfs_external_logs); |
167 | static struct jfs_log *dummy_log = NULL; | 167 | static struct jfs_log *dummy_log = NULL; |
168 | static DECLARE_MUTEX(jfs_log_sem); | 168 | static DECLARE_MUTEX(jfs_log_sem); |
169 | 169 | ||
170 | /* | 170 | /* |
171 | * forward references | 171 | * forward references |
172 | */ | 172 | */ |
173 | static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, | 173 | static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, |
174 | struct lrd * lrd, struct tlock * tlck); | 174 | struct lrd * lrd, struct tlock * tlck); |
175 | 175 | ||
176 | static int lmNextPage(struct jfs_log * log); | 176 | static int lmNextPage(struct jfs_log * log); |
177 | static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, | 177 | static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, |
178 | int activate); | 178 | int activate); |
179 | 179 | ||
180 | static int open_inline_log(struct super_block *sb); | 180 | static int open_inline_log(struct super_block *sb); |
181 | static int open_dummy_log(struct super_block *sb); | 181 | static int open_dummy_log(struct super_block *sb); |
182 | static int lbmLogInit(struct jfs_log * log); | 182 | static int lbmLogInit(struct jfs_log * log); |
183 | static void lbmLogShutdown(struct jfs_log * log); | 183 | static void lbmLogShutdown(struct jfs_log * log); |
184 | static struct lbuf *lbmAllocate(struct jfs_log * log, int); | 184 | static struct lbuf *lbmAllocate(struct jfs_log * log, int); |
185 | static void lbmFree(struct lbuf * bp); | 185 | static void lbmFree(struct lbuf * bp); |
186 | static void lbmfree(struct lbuf * bp); | 186 | static void lbmfree(struct lbuf * bp); |
187 | static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp); | 187 | static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp); |
188 | static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block); | 188 | static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block); |
189 | static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag); | 189 | static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag); |
190 | static int lbmIOWait(struct lbuf * bp, int flag); | 190 | static int lbmIOWait(struct lbuf * bp, int flag); |
191 | static bio_end_io_t lbmIODone; | 191 | static bio_end_io_t lbmIODone; |
192 | static void lbmStartIO(struct lbuf * bp); | 192 | static void lbmStartIO(struct lbuf * bp); |
193 | static void lmGCwrite(struct jfs_log * log, int cant_block); | 193 | static void lmGCwrite(struct jfs_log * log, int cant_block); |
194 | static int lmLogSync(struct jfs_log * log, int nosyncwait); | 194 | static int lmLogSync(struct jfs_log * log, int hard_sync); |
195 | 195 | ||
196 | 196 | ||
197 | 197 | ||
198 | /* | 198 | /* |
199 | * statistics | 199 | * statistics |
200 | */ | 200 | */ |
201 | #ifdef CONFIG_JFS_STATISTICS | 201 | #ifdef CONFIG_JFS_STATISTICS |
202 | static struct lmStat { | 202 | static struct lmStat { |
203 | uint commit; /* # of commit */ | 203 | uint commit; /* # of commit */ |
204 | uint pagedone; /* # of page written */ | 204 | uint pagedone; /* # of page written */ |
205 | uint submitted; /* # of pages submitted */ | 205 | uint submitted; /* # of pages submitted */ |
206 | uint full_page; /* # of full pages submitted */ | 206 | uint full_page; /* # of full pages submitted */ |
207 | uint partial_page; /* # of partial pages submitted */ | 207 | uint partial_page; /* # of partial pages submitted */ |
208 | } lmStat; | 208 | } lmStat; |
209 | #endif | 209 | #endif |
210 | 210 | ||
211 | 211 | ||
212 | /* | 212 | /* |
213 | * NAME: lmLog() | 213 | * NAME: lmLog() |
214 | * | 214 | * |
215 | * FUNCTION: write a log record; | 215 | * FUNCTION: write a log record; |
216 | * | 216 | * |
217 | * PARAMETER: | 217 | * PARAMETER: |
218 | * | 218 | * |
219 | * RETURN: lsn - offset to the next log record to write (end-of-log); | 219 | * RETURN: lsn - offset to the next log record to write (end-of-log); |
220 | * -1 - error; | 220 | * -1 - error; |
221 | * | 221 | * |
222 | * note: todo: log error handler | 222 | * note: todo: log error handler |
223 | */ | 223 | */ |
224 | int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 224 | int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
225 | struct tlock * tlck) | 225 | struct tlock * tlck) |
226 | { | 226 | { |
227 | int lsn; | 227 | int lsn; |
228 | int diffp, difft; | 228 | int diffp, difft; |
229 | struct metapage *mp = NULL; | 229 | struct metapage *mp = NULL; |
230 | unsigned long flags; | 230 | unsigned long flags; |
231 | 231 | ||
232 | jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", | 232 | jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", |
233 | log, tblk, lrd, tlck); | 233 | log, tblk, lrd, tlck); |
234 | 234 | ||
235 | LOG_LOCK(log); | 235 | LOG_LOCK(log); |
236 | 236 | ||
237 | /* log by (out-of-transaction) JFS ? */ | 237 | /* log by (out-of-transaction) JFS ? */ |
238 | if (tblk == NULL) | 238 | if (tblk == NULL) |
239 | goto writeRecord; | 239 | goto writeRecord; |
240 | 240 | ||
241 | /* log from page ? */ | 241 | /* log from page ? */ |
242 | if (tlck == NULL || | 242 | if (tlck == NULL || |
243 | tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) | 243 | tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) |
244 | goto writeRecord; | 244 | goto writeRecord; |
245 | 245 | ||
246 | /* | 246 | /* |
247 | * initialize/update page/transaction recovery lsn | 247 | * initialize/update page/transaction recovery lsn |
248 | */ | 248 | */ |
249 | lsn = log->lsn; | 249 | lsn = log->lsn; |
250 | 250 | ||
251 | LOGSYNC_LOCK(log, flags); | 251 | LOGSYNC_LOCK(log, flags); |
252 | 252 | ||
253 | /* | 253 | /* |
254 | * initialize page lsn if first log write of the page | 254 | * initialize page lsn if first log write of the page |
255 | */ | 255 | */ |
256 | if (mp->lsn == 0) { | 256 | if (mp->lsn == 0) { |
257 | mp->log = log; | 257 | mp->log = log; |
258 | mp->lsn = lsn; | 258 | mp->lsn = lsn; |
259 | log->count++; | 259 | log->count++; |
260 | 260 | ||
261 | /* insert page at tail of logsynclist */ | 261 | /* insert page at tail of logsynclist */ |
262 | list_add_tail(&mp->synclist, &log->synclist); | 262 | list_add_tail(&mp->synclist, &log->synclist); |
263 | } | 263 | } |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * initialize/update lsn of tblock of the page | 266 | * initialize/update lsn of tblock of the page |
267 | * | 267 | * |
268 | * transaction inherits oldest lsn of pages associated | 268 | * transaction inherits oldest lsn of pages associated |
269 | * with allocation/deallocation of resources (their | 269 | * with allocation/deallocation of resources (their |
270 | * log records are used to reconstruct allocation map | 270 | * log records are used to reconstruct allocation map |
271 | * at recovery time: inode for inode allocation map, | 271 | * at recovery time: inode for inode allocation map, |
272 | * B+-tree index of extent descriptors for block | 272 | * B+-tree index of extent descriptors for block |
273 | * allocation map); | 273 | * allocation map); |
274 | * allocation map pages inherit transaction lsn at | 274 | * allocation map pages inherit transaction lsn at |
275 | * commit time to allow forwarding log syncpt past log | 275 | * commit time to allow forwarding log syncpt past log |
276 | * records associated with allocation/deallocation of | 276 | * records associated with allocation/deallocation of |
277 | * resources only after persistent map of these map pages | 277 | * resources only after persistent map of these map pages |
278 | * have been updated and propagated to home. | 278 | * have been updated and propagated to home. |
279 | */ | 279 | */ |
280 | /* | 280 | /* |
281 | * initialize transaction lsn: | 281 | * initialize transaction lsn: |
282 | */ | 282 | */ |
283 | if (tblk->lsn == 0) { | 283 | if (tblk->lsn == 0) { |
284 | /* inherit lsn of its first page logged */ | 284 | /* inherit lsn of its first page logged */ |
285 | tblk->lsn = mp->lsn; | 285 | tblk->lsn = mp->lsn; |
286 | log->count++; | 286 | log->count++; |
287 | 287 | ||
288 | /* insert tblock after the page on logsynclist */ | 288 | /* insert tblock after the page on logsynclist */ |
289 | list_add(&tblk->synclist, &mp->synclist); | 289 | list_add(&tblk->synclist, &mp->synclist); |
290 | } | 290 | } |
291 | /* | 291 | /* |
292 | * update transaction lsn: | 292 | * update transaction lsn: |
293 | */ | 293 | */ |
294 | else { | 294 | else { |
295 | /* inherit oldest/smallest lsn of page */ | 295 | /* inherit oldest/smallest lsn of page */ |
296 | logdiff(diffp, mp->lsn, log); | 296 | logdiff(diffp, mp->lsn, log); |
297 | logdiff(difft, tblk->lsn, log); | 297 | logdiff(difft, tblk->lsn, log); |
298 | if (diffp < difft) { | 298 | if (diffp < difft) { |
299 | /* update tblock lsn with page lsn */ | 299 | /* update tblock lsn with page lsn */ |
300 | tblk->lsn = mp->lsn; | 300 | tblk->lsn = mp->lsn; |
301 | 301 | ||
302 | /* move tblock after page on logsynclist */ | 302 | /* move tblock after page on logsynclist */ |
303 | list_move(&tblk->synclist, &mp->synclist); | 303 | list_move(&tblk->synclist, &mp->synclist); |
304 | } | 304 | } |
305 | } | 305 | } |
306 | 306 | ||
307 | LOGSYNC_UNLOCK(log, flags); | 307 | LOGSYNC_UNLOCK(log, flags); |
308 | 308 | ||
309 | /* | 309 | /* |
310 | * write the log record | 310 | * write the log record |
311 | */ | 311 | */ |
312 | writeRecord: | 312 | writeRecord: |
313 | lsn = lmWriteRecord(log, tblk, lrd, tlck); | 313 | lsn = lmWriteRecord(log, tblk, lrd, tlck); |
314 | 314 | ||
315 | /* | 315 | /* |
316 | * forward log syncpt if log reached next syncpt trigger | 316 | * forward log syncpt if log reached next syncpt trigger |
317 | */ | 317 | */ |
318 | logdiff(diffp, lsn, log); | 318 | logdiff(diffp, lsn, log); |
319 | if (diffp >= log->nextsync) | 319 | if (diffp >= log->nextsync) |
320 | lsn = lmLogSync(log, 0); | 320 | lsn = lmLogSync(log, 0); |
321 | 321 | ||
322 | /* update end-of-log lsn */ | 322 | /* update end-of-log lsn */ |
323 | log->lsn = lsn; | 323 | log->lsn = lsn; |
324 | 324 | ||
325 | LOG_UNLOCK(log); | 325 | LOG_UNLOCK(log); |
326 | 326 | ||
327 | /* return end-of-log address */ | 327 | /* return end-of-log address */ |
328 | return lsn; | 328 | return lsn; |
329 | } | 329 | } |
330 | 330 | ||
331 | /* | 331 | /* |
332 | * NAME: lmWriteRecord() | 332 | * NAME: lmWriteRecord() |
333 | * | 333 | * |
334 | * FUNCTION: move the log record to current log page | 334 | * FUNCTION: move the log record to current log page |
335 | * | 335 | * |
336 | * PARAMETER: cd - commit descriptor | 336 | * PARAMETER: cd - commit descriptor |
337 | * | 337 | * |
338 | * RETURN: end-of-log address | 338 | * RETURN: end-of-log address |
339 | * | 339 | * |
340 | * serialization: LOG_LOCK() held on entry/exit | 340 | * serialization: LOG_LOCK() held on entry/exit |
341 | */ | 341 | */ |
342 | static int | 342 | static int |
343 | lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 343 | lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
344 | struct tlock * tlck) | 344 | struct tlock * tlck) |
345 | { | 345 | { |
346 | int lsn = 0; /* end-of-log address */ | 346 | int lsn = 0; /* end-of-log address */ |
347 | struct lbuf *bp; /* dst log page buffer */ | 347 | struct lbuf *bp; /* dst log page buffer */ |
348 | struct logpage *lp; /* dst log page */ | 348 | struct logpage *lp; /* dst log page */ |
349 | caddr_t dst; /* destination address in log page */ | 349 | caddr_t dst; /* destination address in log page */ |
350 | int dstoffset; /* end-of-log offset in log page */ | 350 | int dstoffset; /* end-of-log offset in log page */ |
351 | int freespace; /* free space in log page */ | 351 | int freespace; /* free space in log page */ |
352 | caddr_t p; /* src meta-data page */ | 352 | caddr_t p; /* src meta-data page */ |
353 | caddr_t src; | 353 | caddr_t src; |
354 | int srclen; | 354 | int srclen; |
355 | int nbytes; /* number of bytes to move */ | 355 | int nbytes; /* number of bytes to move */ |
356 | int i; | 356 | int i; |
357 | int len; | 357 | int len; |
358 | struct linelock *linelock; | 358 | struct linelock *linelock; |
359 | struct lv *lv; | 359 | struct lv *lv; |
360 | struct lvd *lvd; | 360 | struct lvd *lvd; |
361 | int l2linesize; | 361 | int l2linesize; |
362 | 362 | ||
363 | len = 0; | 363 | len = 0; |
364 | 364 | ||
365 | /* retrieve destination log page to write */ | 365 | /* retrieve destination log page to write */ |
366 | bp = (struct lbuf *) log->bp; | 366 | bp = (struct lbuf *) log->bp; |
367 | lp = (struct logpage *) bp->l_ldata; | 367 | lp = (struct logpage *) bp->l_ldata; |
368 | dstoffset = log->eor; | 368 | dstoffset = log->eor; |
369 | 369 | ||
370 | /* any log data to write ? */ | 370 | /* any log data to write ? */ |
371 | if (tlck == NULL) | 371 | if (tlck == NULL) |
372 | goto moveLrd; | 372 | goto moveLrd; |
373 | 373 | ||
374 | /* | 374 | /* |
375 | * move log record data | 375 | * move log record data |
376 | */ | 376 | */ |
377 | /* retrieve source meta-data page to log */ | 377 | /* retrieve source meta-data page to log */ |
378 | if (tlck->flag & tlckPAGELOCK) { | 378 | if (tlck->flag & tlckPAGELOCK) { |
379 | p = (caddr_t) (tlck->mp->data); | 379 | p = (caddr_t) (tlck->mp->data); |
380 | linelock = (struct linelock *) & tlck->lock; | 380 | linelock = (struct linelock *) & tlck->lock; |
381 | } | 381 | } |
382 | /* retrieve source in-memory inode to log */ | 382 | /* retrieve source in-memory inode to log */ |
383 | else if (tlck->flag & tlckINODELOCK) { | 383 | else if (tlck->flag & tlckINODELOCK) { |
384 | if (tlck->type & tlckDTREE) | 384 | if (tlck->type & tlckDTREE) |
385 | p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; | 385 | p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; |
386 | else | 386 | else |
387 | p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; | 387 | p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; |
388 | linelock = (struct linelock *) & tlck->lock; | 388 | linelock = (struct linelock *) & tlck->lock; |
389 | } | 389 | } |
390 | #ifdef _JFS_WIP | 390 | #ifdef _JFS_WIP |
391 | else if (tlck->flag & tlckINLINELOCK) { | 391 | else if (tlck->flag & tlckINLINELOCK) { |
392 | 392 | ||
393 | inlinelock = (struct inlinelock *) & tlck; | 393 | inlinelock = (struct inlinelock *) & tlck; |
394 | p = (caddr_t) & inlinelock->pxd; | 394 | p = (caddr_t) & inlinelock->pxd; |
395 | linelock = (struct linelock *) & tlck; | 395 | linelock = (struct linelock *) & tlck; |
396 | } | 396 | } |
397 | #endif /* _JFS_WIP */ | 397 | #endif /* _JFS_WIP */ |
398 | else { | 398 | else { |
399 | jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); | 399 | jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); |
400 | return 0; /* Probably should trap */ | 400 | return 0; /* Probably should trap */ |
401 | } | 401 | } |
402 | l2linesize = linelock->l2linesize; | 402 | l2linesize = linelock->l2linesize; |
403 | 403 | ||
404 | moveData: | 404 | moveData: |
405 | ASSERT(linelock->index <= linelock->maxcnt); | 405 | ASSERT(linelock->index <= linelock->maxcnt); |
406 | 406 | ||
407 | lv = linelock->lv; | 407 | lv = linelock->lv; |
408 | for (i = 0; i < linelock->index; i++, lv++) { | 408 | for (i = 0; i < linelock->index; i++, lv++) { |
409 | if (lv->length == 0) | 409 | if (lv->length == 0) |
410 | continue; | 410 | continue; |
411 | 411 | ||
412 | /* is page full ? */ | 412 | /* is page full ? */ |
413 | if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { | 413 | if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { |
414 | /* page become full: move on to next page */ | 414 | /* page become full: move on to next page */ |
415 | lmNextPage(log); | 415 | lmNextPage(log); |
416 | 416 | ||
417 | bp = log->bp; | 417 | bp = log->bp; |
418 | lp = (struct logpage *) bp->l_ldata; | 418 | lp = (struct logpage *) bp->l_ldata; |
419 | dstoffset = LOGPHDRSIZE; | 419 | dstoffset = LOGPHDRSIZE; |
420 | } | 420 | } |
421 | 421 | ||
422 | /* | 422 | /* |
423 | * move log vector data | 423 | * move log vector data |
424 | */ | 424 | */ |
425 | src = (u8 *) p + (lv->offset << l2linesize); | 425 | src = (u8 *) p + (lv->offset << l2linesize); |
426 | srclen = lv->length << l2linesize; | 426 | srclen = lv->length << l2linesize; |
427 | len += srclen; | 427 | len += srclen; |
428 | while (srclen > 0) { | 428 | while (srclen > 0) { |
429 | freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; | 429 | freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; |
430 | nbytes = min(freespace, srclen); | 430 | nbytes = min(freespace, srclen); |
431 | dst = (caddr_t) lp + dstoffset; | 431 | dst = (caddr_t) lp + dstoffset; |
432 | memcpy(dst, src, nbytes); | 432 | memcpy(dst, src, nbytes); |
433 | dstoffset += nbytes; | 433 | dstoffset += nbytes; |
434 | 434 | ||
435 | /* is page not full ? */ | 435 | /* is page not full ? */ |
436 | if (dstoffset < LOGPSIZE - LOGPTLRSIZE) | 436 | if (dstoffset < LOGPSIZE - LOGPTLRSIZE) |
437 | break; | 437 | break; |
438 | 438 | ||
439 | /* page become full: move on to next page */ | 439 | /* page become full: move on to next page */ |
440 | lmNextPage(log); | 440 | lmNextPage(log); |
441 | 441 | ||
442 | bp = (struct lbuf *) log->bp; | 442 | bp = (struct lbuf *) log->bp; |
443 | lp = (struct logpage *) bp->l_ldata; | 443 | lp = (struct logpage *) bp->l_ldata; |
444 | dstoffset = LOGPHDRSIZE; | 444 | dstoffset = LOGPHDRSIZE; |
445 | 445 | ||
446 | srclen -= nbytes; | 446 | srclen -= nbytes; |
447 | src += nbytes; | 447 | src += nbytes; |
448 | } | 448 | } |
449 | 449 | ||
450 | /* | 450 | /* |
451 | * move log vector descriptor | 451 | * move log vector descriptor |
452 | */ | 452 | */ |
453 | len += 4; | 453 | len += 4; |
454 | lvd = (struct lvd *) ((caddr_t) lp + dstoffset); | 454 | lvd = (struct lvd *) ((caddr_t) lp + dstoffset); |
455 | lvd->offset = cpu_to_le16(lv->offset); | 455 | lvd->offset = cpu_to_le16(lv->offset); |
456 | lvd->length = cpu_to_le16(lv->length); | 456 | lvd->length = cpu_to_le16(lv->length); |
457 | dstoffset += 4; | 457 | dstoffset += 4; |
458 | jfs_info("lmWriteRecord: lv offset:%d length:%d", | 458 | jfs_info("lmWriteRecord: lv offset:%d length:%d", |
459 | lv->offset, lv->length); | 459 | lv->offset, lv->length); |
460 | } | 460 | } |
461 | 461 | ||
462 | if ((i = linelock->next)) { | 462 | if ((i = linelock->next)) { |
463 | linelock = (struct linelock *) lid_to_tlock(i); | 463 | linelock = (struct linelock *) lid_to_tlock(i); |
464 | goto moveData; | 464 | goto moveData; |
465 | } | 465 | } |
466 | 466 | ||
467 | /* | 467 | /* |
468 | * move log record descriptor | 468 | * move log record descriptor |
469 | */ | 469 | */ |
470 | moveLrd: | 470 | moveLrd: |
471 | lrd->length = cpu_to_le16(len); | 471 | lrd->length = cpu_to_le16(len); |
472 | 472 | ||
473 | src = (caddr_t) lrd; | 473 | src = (caddr_t) lrd; |
474 | srclen = LOGRDSIZE; | 474 | srclen = LOGRDSIZE; |
475 | 475 | ||
476 | while (srclen > 0) { | 476 | while (srclen > 0) { |
477 | freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; | 477 | freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; |
478 | nbytes = min(freespace, srclen); | 478 | nbytes = min(freespace, srclen); |
479 | dst = (caddr_t) lp + dstoffset; | 479 | dst = (caddr_t) lp + dstoffset; |
480 | memcpy(dst, src, nbytes); | 480 | memcpy(dst, src, nbytes); |
481 | 481 | ||
482 | dstoffset += nbytes; | 482 | dstoffset += nbytes; |
483 | srclen -= nbytes; | 483 | srclen -= nbytes; |
484 | 484 | ||
485 | /* are there more to move than freespace of page ? */ | 485 | /* are there more to move than freespace of page ? */ |
486 | if (srclen) | 486 | if (srclen) |
487 | goto pageFull; | 487 | goto pageFull; |
488 | 488 | ||
489 | /* | 489 | /* |
490 | * end of log record descriptor | 490 | * end of log record descriptor |
491 | */ | 491 | */ |
492 | 492 | ||
493 | /* update last log record eor */ | 493 | /* update last log record eor */ |
494 | log->eor = dstoffset; | 494 | log->eor = dstoffset; |
495 | bp->l_eor = dstoffset; | 495 | bp->l_eor = dstoffset; |
496 | lsn = (log->page << L2LOGPSIZE) + dstoffset; | 496 | lsn = (log->page << L2LOGPSIZE) + dstoffset; |
497 | 497 | ||
498 | if (lrd->type & cpu_to_le16(LOG_COMMIT)) { | 498 | if (lrd->type & cpu_to_le16(LOG_COMMIT)) { |
499 | tblk->clsn = lsn; | 499 | tblk->clsn = lsn; |
500 | jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, | 500 | jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, |
501 | bp->l_eor); | 501 | bp->l_eor); |
502 | 502 | ||
503 | INCREMENT(lmStat.commit); /* # of commit */ | 503 | INCREMENT(lmStat.commit); /* # of commit */ |
504 | 504 | ||
505 | /* | 505 | /* |
506 | * enqueue tblock for group commit: | 506 | * enqueue tblock for group commit: |
507 | * | 507 | * |
508 | * enqueue tblock of non-trivial/synchronous COMMIT | 508 | * enqueue tblock of non-trivial/synchronous COMMIT |
509 | * at tail of group commit queue | 509 | * at tail of group commit queue |
510 | * (trivial/asynchronous COMMITs are ignored by | 510 | * (trivial/asynchronous COMMITs are ignored by |
511 | * group commit.) | 511 | * group commit.) |
512 | */ | 512 | */ |
513 | LOGGC_LOCK(log); | 513 | LOGGC_LOCK(log); |
514 | 514 | ||
515 | /* init tblock gc state */ | 515 | /* init tblock gc state */ |
516 | tblk->flag = tblkGC_QUEUE; | 516 | tblk->flag = tblkGC_QUEUE; |
517 | tblk->bp = log->bp; | 517 | tblk->bp = log->bp; |
518 | tblk->pn = log->page; | 518 | tblk->pn = log->page; |
519 | tblk->eor = log->eor; | 519 | tblk->eor = log->eor; |
520 | 520 | ||
521 | /* enqueue transaction to commit queue */ | 521 | /* enqueue transaction to commit queue */ |
522 | list_add_tail(&tblk->cqueue, &log->cqueue); | 522 | list_add_tail(&tblk->cqueue, &log->cqueue); |
523 | 523 | ||
524 | LOGGC_UNLOCK(log); | 524 | LOGGC_UNLOCK(log); |
525 | } | 525 | } |
526 | 526 | ||
527 | jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", | 527 | jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", |
528 | le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); | 528 | le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); |
529 | 529 | ||
530 | /* page not full ? */ | 530 | /* page not full ? */ |
531 | if (dstoffset < LOGPSIZE - LOGPTLRSIZE) | 531 | if (dstoffset < LOGPSIZE - LOGPTLRSIZE) |
532 | return lsn; | 532 | return lsn; |
533 | 533 | ||
534 | pageFull: | 534 | pageFull: |
535 | /* page become full: move on to next page */ | 535 | /* page become full: move on to next page */ |
536 | lmNextPage(log); | 536 | lmNextPage(log); |
537 | 537 | ||
538 | bp = (struct lbuf *) log->bp; | 538 | bp = (struct lbuf *) log->bp; |
539 | lp = (struct logpage *) bp->l_ldata; | 539 | lp = (struct logpage *) bp->l_ldata; |
540 | dstoffset = LOGPHDRSIZE; | 540 | dstoffset = LOGPHDRSIZE; |
541 | src += nbytes; | 541 | src += nbytes; |
542 | } | 542 | } |
543 | 543 | ||
544 | return lsn; | 544 | return lsn; |
545 | } | 545 | } |
546 | 546 | ||
547 | 547 | ||
548 | /* | 548 | /* |
549 | * NAME: lmNextPage() | 549 | * NAME: lmNextPage() |
550 | * | 550 | * |
551 | * FUNCTION: write current page and allocate next page. | 551 | * FUNCTION: write current page and allocate next page. |
552 | * | 552 | * |
553 | * PARAMETER: log | 553 | * PARAMETER: log |
554 | * | 554 | * |
555 | * RETURN: 0 | 555 | * RETURN: 0 |
556 | * | 556 | * |
557 | * serialization: LOG_LOCK() held on entry/exit | 557 | * serialization: LOG_LOCK() held on entry/exit |
558 | */ | 558 | */ |
559 | static int lmNextPage(struct jfs_log * log) | 559 | static int lmNextPage(struct jfs_log * log) |
560 | { | 560 | { |
561 | struct logpage *lp; | 561 | struct logpage *lp; |
562 | int lspn; /* log sequence page number */ | 562 | int lspn; /* log sequence page number */ |
563 | int pn; /* current page number */ | 563 | int pn; /* current page number */ |
564 | struct lbuf *bp; | 564 | struct lbuf *bp; |
565 | struct lbuf *nextbp; | 565 | struct lbuf *nextbp; |
566 | struct tblock *tblk; | 566 | struct tblock *tblk; |
567 | 567 | ||
568 | /* get current log page number and log sequence page number */ | 568 | /* get current log page number and log sequence page number */ |
569 | pn = log->page; | 569 | pn = log->page; |
570 | bp = log->bp; | 570 | bp = log->bp; |
571 | lp = (struct logpage *) bp->l_ldata; | 571 | lp = (struct logpage *) bp->l_ldata; |
572 | lspn = le32_to_cpu(lp->h.page); | 572 | lspn = le32_to_cpu(lp->h.page); |
573 | 573 | ||
574 | LOGGC_LOCK(log); | 574 | LOGGC_LOCK(log); |
575 | 575 | ||
576 | /* | 576 | /* |
577 | * write or queue the full page at the tail of write queue | 577 | * write or queue the full page at the tail of write queue |
578 | */ | 578 | */ |
579 | /* get the tail tblk on commit queue */ | 579 | /* get the tail tblk on commit queue */ |
580 | if (list_empty(&log->cqueue)) | 580 | if (list_empty(&log->cqueue)) |
581 | tblk = NULL; | 581 | tblk = NULL; |
582 | else | 582 | else |
583 | tblk = list_entry(log->cqueue.prev, struct tblock, cqueue); | 583 | tblk = list_entry(log->cqueue.prev, struct tblock, cqueue); |
584 | 584 | ||
585 | /* every tblk who has COMMIT record on the current page, | 585 | /* every tblk who has COMMIT record on the current page, |
586 | * and has not been committed, must be on commit queue | 586 | * and has not been committed, must be on commit queue |
587 | * since tblk is queued at commit queueu at the time | 587 | * since tblk is queued at commit queueu at the time |
588 | * of writing its COMMIT record on the page before | 588 | * of writing its COMMIT record on the page before |
589 | * page becomes full (even though the tblk thread | 589 | * page becomes full (even though the tblk thread |
590 | * who wrote COMMIT record may have been suspended | 590 | * who wrote COMMIT record may have been suspended |
591 | * currently); | 591 | * currently); |
592 | */ | 592 | */ |
593 | 593 | ||
594 | /* is page bound with outstanding tail tblk ? */ | 594 | /* is page bound with outstanding tail tblk ? */ |
595 | if (tblk && tblk->pn == pn) { | 595 | if (tblk && tblk->pn == pn) { |
596 | /* mark tblk for end-of-page */ | 596 | /* mark tblk for end-of-page */ |
597 | tblk->flag |= tblkGC_EOP; | 597 | tblk->flag |= tblkGC_EOP; |
598 | 598 | ||
599 | if (log->cflag & logGC_PAGEOUT) { | 599 | if (log->cflag & logGC_PAGEOUT) { |
600 | /* if page is not already on write queue, | 600 | /* if page is not already on write queue, |
601 | * just enqueue (no lbmWRITE to prevent redrive) | 601 | * just enqueue (no lbmWRITE to prevent redrive) |
602 | * buffer to wqueue to ensure correct serial order | 602 | * buffer to wqueue to ensure correct serial order |
603 | * of the pages since log pages will be added | 603 | * of the pages since log pages will be added |
604 | * continuously | 604 | * continuously |
605 | */ | 605 | */ |
606 | if (bp->l_wqnext == NULL) | 606 | if (bp->l_wqnext == NULL) |
607 | lbmWrite(log, bp, 0, 0); | 607 | lbmWrite(log, bp, 0, 0); |
608 | } else { | 608 | } else { |
609 | /* | 609 | /* |
610 | * No current GC leader, initiate group commit | 610 | * No current GC leader, initiate group commit |
611 | */ | 611 | */ |
612 | log->cflag |= logGC_PAGEOUT; | 612 | log->cflag |= logGC_PAGEOUT; |
613 | lmGCwrite(log, 0); | 613 | lmGCwrite(log, 0); |
614 | } | 614 | } |
615 | } | 615 | } |
616 | /* page is not bound with outstanding tblk: | 616 | /* page is not bound with outstanding tblk: |
617 | * init write or mark it to be redriven (lbmWRITE) | 617 | * init write or mark it to be redriven (lbmWRITE) |
618 | */ | 618 | */ |
619 | else { | 619 | else { |
620 | /* finalize the page */ | 620 | /* finalize the page */ |
621 | bp->l_ceor = bp->l_eor; | 621 | bp->l_ceor = bp->l_eor; |
622 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); | 622 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); |
623 | lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); | 623 | lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); |
624 | } | 624 | } |
625 | LOGGC_UNLOCK(log); | 625 | LOGGC_UNLOCK(log); |
626 | 626 | ||
627 | /* | 627 | /* |
628 | * allocate/initialize next page | 628 | * allocate/initialize next page |
629 | */ | 629 | */ |
630 | /* if log wraps, the first data page of log is 2 | 630 | /* if log wraps, the first data page of log is 2 |
631 | * (0 never used, 1 is superblock). | 631 | * (0 never used, 1 is superblock). |
632 | */ | 632 | */ |
633 | log->page = (pn == log->size - 1) ? 2 : pn + 1; | 633 | log->page = (pn == log->size - 1) ? 2 : pn + 1; |
634 | log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ | 634 | log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ |
635 | 635 | ||
636 | /* allocate/initialize next log page buffer */ | 636 | /* allocate/initialize next log page buffer */ |
637 | nextbp = lbmAllocate(log, log->page); | 637 | nextbp = lbmAllocate(log, log->page); |
638 | nextbp->l_eor = log->eor; | 638 | nextbp->l_eor = log->eor; |
639 | log->bp = nextbp; | 639 | log->bp = nextbp; |
640 | 640 | ||
641 | /* initialize next log page */ | 641 | /* initialize next log page */ |
642 | lp = (struct logpage *) nextbp->l_ldata; | 642 | lp = (struct logpage *) nextbp->l_ldata; |
643 | lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); | 643 | lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); |
644 | lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); | 644 | lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); |
645 | 645 | ||
646 | return 0; | 646 | return 0; |
647 | } | 647 | } |
648 | 648 | ||
649 | 649 | ||
650 | /* | 650 | /* |
651 | * NAME: lmGroupCommit() | 651 | * NAME: lmGroupCommit() |
652 | * | 652 | * |
653 | * FUNCTION: group commit | 653 | * FUNCTION: group commit |
654 | * initiate pageout of the pages with COMMIT in the order of | 654 | * initiate pageout of the pages with COMMIT in the order of |
655 | * page number - redrive pageout of the page at the head of | 655 | * page number - redrive pageout of the page at the head of |
656 | * pageout queue until full page has been written. | 656 | * pageout queue until full page has been written. |
657 | * | 657 | * |
658 | * RETURN: | 658 | * RETURN: |
659 | * | 659 | * |
660 | * NOTE: | 660 | * NOTE: |
661 | * LOGGC_LOCK serializes log group commit queue, and | 661 | * LOGGC_LOCK serializes log group commit queue, and |
662 | * transaction blocks on the commit queue. | 662 | * transaction blocks on the commit queue. |
663 | * N.B. LOG_LOCK is NOT held during lmGroupCommit(). | 663 | * N.B. LOG_LOCK is NOT held during lmGroupCommit(). |
664 | */ | 664 | */ |
665 | int lmGroupCommit(struct jfs_log * log, struct tblock * tblk) | 665 | int lmGroupCommit(struct jfs_log * log, struct tblock * tblk) |
666 | { | 666 | { |
667 | int rc = 0; | 667 | int rc = 0; |
668 | 668 | ||
669 | LOGGC_LOCK(log); | 669 | LOGGC_LOCK(log); |
670 | 670 | ||
671 | /* group committed already ? */ | 671 | /* group committed already ? */ |
672 | if (tblk->flag & tblkGC_COMMITTED) { | 672 | if (tblk->flag & tblkGC_COMMITTED) { |
673 | if (tblk->flag & tblkGC_ERROR) | 673 | if (tblk->flag & tblkGC_ERROR) |
674 | rc = -EIO; | 674 | rc = -EIO; |
675 | 675 | ||
676 | LOGGC_UNLOCK(log); | 676 | LOGGC_UNLOCK(log); |
677 | return rc; | 677 | return rc; |
678 | } | 678 | } |
679 | jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc); | 679 | jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc); |
680 | 680 | ||
681 | if (tblk->xflag & COMMIT_LAZY) | 681 | if (tblk->xflag & COMMIT_LAZY) |
682 | tblk->flag |= tblkGC_LAZY; | 682 | tblk->flag |= tblkGC_LAZY; |
683 | 683 | ||
684 | if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) && | 684 | if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) && |
685 | (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag) | 685 | (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag) |
686 | || jfs_tlocks_low)) { | 686 | || jfs_tlocks_low)) { |
687 | /* | 687 | /* |
688 | * No pageout in progress | 688 | * No pageout in progress |
689 | * | 689 | * |
690 | * start group commit as its group leader. | 690 | * start group commit as its group leader. |
691 | */ | 691 | */ |
692 | log->cflag |= logGC_PAGEOUT; | 692 | log->cflag |= logGC_PAGEOUT; |
693 | 693 | ||
694 | lmGCwrite(log, 0); | 694 | lmGCwrite(log, 0); |
695 | } | 695 | } |
696 | 696 | ||
697 | if (tblk->xflag & COMMIT_LAZY) { | 697 | if (tblk->xflag & COMMIT_LAZY) { |
698 | /* | 698 | /* |
699 | * Lazy transactions can leave now | 699 | * Lazy transactions can leave now |
700 | */ | 700 | */ |
701 | LOGGC_UNLOCK(log); | 701 | LOGGC_UNLOCK(log); |
702 | return 0; | 702 | return 0; |
703 | } | 703 | } |
704 | 704 | ||
705 | /* lmGCwrite gives up LOGGC_LOCK, check again */ | 705 | /* lmGCwrite gives up LOGGC_LOCK, check again */ |
706 | 706 | ||
707 | if (tblk->flag & tblkGC_COMMITTED) { | 707 | if (tblk->flag & tblkGC_COMMITTED) { |
708 | if (tblk->flag & tblkGC_ERROR) | 708 | if (tblk->flag & tblkGC_ERROR) |
709 | rc = -EIO; | 709 | rc = -EIO; |
710 | 710 | ||
711 | LOGGC_UNLOCK(log); | 711 | LOGGC_UNLOCK(log); |
712 | return rc; | 712 | return rc; |
713 | } | 713 | } |
714 | 714 | ||
715 | /* upcount transaction waiting for completion | 715 | /* upcount transaction waiting for completion |
716 | */ | 716 | */ |
717 | log->gcrtc++; | 717 | log->gcrtc++; |
718 | tblk->flag |= tblkGC_READY; | 718 | tblk->flag |= tblkGC_READY; |
719 | 719 | ||
720 | __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), | 720 | __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), |
721 | LOGGC_LOCK(log), LOGGC_UNLOCK(log)); | 721 | LOGGC_LOCK(log), LOGGC_UNLOCK(log)); |
722 | 722 | ||
723 | /* removed from commit queue */ | 723 | /* removed from commit queue */ |
724 | if (tblk->flag & tblkGC_ERROR) | 724 | if (tblk->flag & tblkGC_ERROR) |
725 | rc = -EIO; | 725 | rc = -EIO; |
726 | 726 | ||
727 | LOGGC_UNLOCK(log); | 727 | LOGGC_UNLOCK(log); |
728 | return rc; | 728 | return rc; |
729 | } | 729 | } |
730 | 730 | ||
731 | /* | 731 | /* |
732 | * NAME: lmGCwrite() | 732 | * NAME: lmGCwrite() |
733 | * | 733 | * |
734 | * FUNCTION: group commit write | 734 | * FUNCTION: group commit write |
735 | * initiate write of log page, building a group of all transactions | 735 | * initiate write of log page, building a group of all transactions |
736 | * with commit records on that page. | 736 | * with commit records on that page. |
737 | * | 737 | * |
738 | * RETURN: None | 738 | * RETURN: None |
739 | * | 739 | * |
740 | * NOTE: | 740 | * NOTE: |
741 | * LOGGC_LOCK must be held by caller. | 741 | * LOGGC_LOCK must be held by caller. |
742 | * N.B. LOG_LOCK is NOT held during lmGroupCommit(). | 742 | * N.B. LOG_LOCK is NOT held during lmGroupCommit(). |
743 | */ | 743 | */ |
744 | static void lmGCwrite(struct jfs_log * log, int cant_write) | 744 | static void lmGCwrite(struct jfs_log * log, int cant_write) |
745 | { | 745 | { |
746 | struct lbuf *bp; | 746 | struct lbuf *bp; |
747 | struct logpage *lp; | 747 | struct logpage *lp; |
748 | int gcpn; /* group commit page number */ | 748 | int gcpn; /* group commit page number */ |
749 | struct tblock *tblk; | 749 | struct tblock *tblk; |
750 | struct tblock *xtblk = NULL; | 750 | struct tblock *xtblk = NULL; |
751 | 751 | ||
752 | /* | 752 | /* |
753 | * build the commit group of a log page | 753 | * build the commit group of a log page |
754 | * | 754 | * |
755 | * scan commit queue and make a commit group of all | 755 | * scan commit queue and make a commit group of all |
756 | * transactions with COMMIT records on the same log page. | 756 | * transactions with COMMIT records on the same log page. |
757 | */ | 757 | */ |
758 | /* get the head tblk on the commit queue */ | 758 | /* get the head tblk on the commit queue */ |
759 | gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn; | 759 | gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn; |
760 | 760 | ||
761 | list_for_each_entry(tblk, &log->cqueue, cqueue) { | 761 | list_for_each_entry(tblk, &log->cqueue, cqueue) { |
762 | if (tblk->pn != gcpn) | 762 | if (tblk->pn != gcpn) |
763 | break; | 763 | break; |
764 | 764 | ||
765 | xtblk = tblk; | 765 | xtblk = tblk; |
766 | 766 | ||
767 | /* state transition: (QUEUE, READY) -> COMMIT */ | 767 | /* state transition: (QUEUE, READY) -> COMMIT */ |
768 | tblk->flag |= tblkGC_COMMIT; | 768 | tblk->flag |= tblkGC_COMMIT; |
769 | } | 769 | } |
770 | tblk = xtblk; /* last tblk of the page */ | 770 | tblk = xtblk; /* last tblk of the page */ |
771 | 771 | ||
772 | /* | 772 | /* |
773 | * pageout to commit transactions on the log page. | 773 | * pageout to commit transactions on the log page. |
774 | */ | 774 | */ |
775 | bp = (struct lbuf *) tblk->bp; | 775 | bp = (struct lbuf *) tblk->bp; |
776 | lp = (struct logpage *) bp->l_ldata; | 776 | lp = (struct logpage *) bp->l_ldata; |
777 | /* is page already full ? */ | 777 | /* is page already full ? */ |
778 | if (tblk->flag & tblkGC_EOP) { | 778 | if (tblk->flag & tblkGC_EOP) { |
779 | /* mark page to free at end of group commit of the page */ | 779 | /* mark page to free at end of group commit of the page */ |
780 | tblk->flag &= ~tblkGC_EOP; | 780 | tblk->flag &= ~tblkGC_EOP; |
781 | tblk->flag |= tblkGC_FREE; | 781 | tblk->flag |= tblkGC_FREE; |
782 | bp->l_ceor = bp->l_eor; | 782 | bp->l_ceor = bp->l_eor; |
783 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); | 783 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); |
784 | lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, | 784 | lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, |
785 | cant_write); | 785 | cant_write); |
786 | INCREMENT(lmStat.full_page); | 786 | INCREMENT(lmStat.full_page); |
787 | } | 787 | } |
788 | /* page is not yet full */ | 788 | /* page is not yet full */ |
789 | else { | 789 | else { |
790 | bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ | 790 | bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ |
791 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); | 791 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); |
792 | lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); | 792 | lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); |
793 | INCREMENT(lmStat.partial_page); | 793 | INCREMENT(lmStat.partial_page); |
794 | } | 794 | } |
795 | } | 795 | } |
796 | 796 | ||
797 | /* | 797 | /* |
798 | * NAME: lmPostGC() | 798 | * NAME: lmPostGC() |
799 | * | 799 | * |
800 | * FUNCTION: group commit post-processing | 800 | * FUNCTION: group commit post-processing |
801 | * Processes transactions after their commit records have been written | 801 | * Processes transactions after their commit records have been written |
802 | * to disk, redriving log I/O if necessary. | 802 | * to disk, redriving log I/O if necessary. |
803 | * | 803 | * |
804 | * RETURN: None | 804 | * RETURN: None |
805 | * | 805 | * |
806 | * NOTE: | 806 | * NOTE: |
807 | * This routine is called a interrupt time by lbmIODone | 807 | * This routine is called a interrupt time by lbmIODone |
808 | */ | 808 | */ |
809 | static void lmPostGC(struct lbuf * bp) | 809 | static void lmPostGC(struct lbuf * bp) |
810 | { | 810 | { |
811 | unsigned long flags; | 811 | unsigned long flags; |
812 | struct jfs_log *log = bp->l_log; | 812 | struct jfs_log *log = bp->l_log; |
813 | struct logpage *lp; | 813 | struct logpage *lp; |
814 | struct tblock *tblk, *temp; | 814 | struct tblock *tblk, *temp; |
815 | 815 | ||
816 | //LOGGC_LOCK(log); | 816 | //LOGGC_LOCK(log); |
817 | spin_lock_irqsave(&log->gclock, flags); | 817 | spin_lock_irqsave(&log->gclock, flags); |
818 | /* | 818 | /* |
819 | * current pageout of group commit completed. | 819 | * current pageout of group commit completed. |
820 | * | 820 | * |
821 | * remove/wakeup transactions from commit queue who were | 821 | * remove/wakeup transactions from commit queue who were |
822 | * group committed with the current log page | 822 | * group committed with the current log page |
823 | */ | 823 | */ |
824 | list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { | 824 | list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { |
825 | if (!(tblk->flag & tblkGC_COMMIT)) | 825 | if (!(tblk->flag & tblkGC_COMMIT)) |
826 | break; | 826 | break; |
827 | /* if transaction was marked GC_COMMIT then | 827 | /* if transaction was marked GC_COMMIT then |
828 | * it has been shipped in the current pageout | 828 | * it has been shipped in the current pageout |
829 | * and made it to disk - it is committed. | 829 | * and made it to disk - it is committed. |
830 | */ | 830 | */ |
831 | 831 | ||
832 | if (bp->l_flag & lbmERROR) | 832 | if (bp->l_flag & lbmERROR) |
833 | tblk->flag |= tblkGC_ERROR; | 833 | tblk->flag |= tblkGC_ERROR; |
834 | 834 | ||
835 | /* remove it from the commit queue */ | 835 | /* remove it from the commit queue */ |
836 | list_del(&tblk->cqueue); | 836 | list_del(&tblk->cqueue); |
837 | tblk->flag &= ~tblkGC_QUEUE; | 837 | tblk->flag &= ~tblkGC_QUEUE; |
838 | 838 | ||
839 | if (tblk == log->flush_tblk) { | 839 | if (tblk == log->flush_tblk) { |
840 | /* we can stop flushing the log now */ | 840 | /* we can stop flushing the log now */ |
841 | clear_bit(log_FLUSH, &log->flag); | 841 | clear_bit(log_FLUSH, &log->flag); |
842 | log->flush_tblk = NULL; | 842 | log->flush_tblk = NULL; |
843 | } | 843 | } |
844 | 844 | ||
845 | jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk, | 845 | jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk, |
846 | tblk->flag); | 846 | tblk->flag); |
847 | 847 | ||
848 | if (!(tblk->xflag & COMMIT_FORCE)) | 848 | if (!(tblk->xflag & COMMIT_FORCE)) |
849 | /* | 849 | /* |
850 | * Hand tblk over to lazy commit thread | 850 | * Hand tblk over to lazy commit thread |
851 | */ | 851 | */ |
852 | txLazyUnlock(tblk); | 852 | txLazyUnlock(tblk); |
853 | else { | 853 | else { |
854 | /* state transition: COMMIT -> COMMITTED */ | 854 | /* state transition: COMMIT -> COMMITTED */ |
855 | tblk->flag |= tblkGC_COMMITTED; | 855 | tblk->flag |= tblkGC_COMMITTED; |
856 | 856 | ||
857 | if (tblk->flag & tblkGC_READY) | 857 | if (tblk->flag & tblkGC_READY) |
858 | log->gcrtc--; | 858 | log->gcrtc--; |
859 | 859 | ||
860 | LOGGC_WAKEUP(tblk); | 860 | LOGGC_WAKEUP(tblk); |
861 | } | 861 | } |
862 | 862 | ||
863 | /* was page full before pageout ? | 863 | /* was page full before pageout ? |
864 | * (and this is the last tblk bound with the page) | 864 | * (and this is the last tblk bound with the page) |
865 | */ | 865 | */ |
866 | if (tblk->flag & tblkGC_FREE) | 866 | if (tblk->flag & tblkGC_FREE) |
867 | lbmFree(bp); | 867 | lbmFree(bp); |
868 | /* did page become full after pageout ? | 868 | /* did page become full after pageout ? |
869 | * (and this is the last tblk bound with the page) | 869 | * (and this is the last tblk bound with the page) |
870 | */ | 870 | */ |
871 | else if (tblk->flag & tblkGC_EOP) { | 871 | else if (tblk->flag & tblkGC_EOP) { |
872 | /* finalize the page */ | 872 | /* finalize the page */ |
873 | lp = (struct logpage *) bp->l_ldata; | 873 | lp = (struct logpage *) bp->l_ldata; |
874 | bp->l_ceor = bp->l_eor; | 874 | bp->l_ceor = bp->l_eor; |
875 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); | 875 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); |
876 | jfs_info("lmPostGC: calling lbmWrite"); | 876 | jfs_info("lmPostGC: calling lbmWrite"); |
877 | lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, | 877 | lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, |
878 | 1); | 878 | 1); |
879 | } | 879 | } |
880 | 880 | ||
881 | } | 881 | } |
882 | 882 | ||
883 | /* are there any transactions who have entered lnGroupCommit() | 883 | /* are there any transactions who have entered lnGroupCommit() |
884 | * (whose COMMITs are after that of the last log page written. | 884 | * (whose COMMITs are after that of the last log page written. |
885 | * They are waiting for new group commit (above at (SLEEP 1)) | 885 | * They are waiting for new group commit (above at (SLEEP 1)) |
886 | * or lazy transactions are on a full (queued) log page, | 886 | * or lazy transactions are on a full (queued) log page, |
887 | * select the latest ready transaction as new group leader and | 887 | * select the latest ready transaction as new group leader and |
888 | * wake her up to lead her group. | 888 | * wake her up to lead her group. |
889 | */ | 889 | */ |
890 | if ((!list_empty(&log->cqueue)) && | 890 | if ((!list_empty(&log->cqueue)) && |
891 | ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) || | 891 | ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) || |
892 | test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) | 892 | test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) |
893 | /* | 893 | /* |
894 | * Call lmGCwrite with new group leader | 894 | * Call lmGCwrite with new group leader |
895 | */ | 895 | */ |
896 | lmGCwrite(log, 1); | 896 | lmGCwrite(log, 1); |
897 | 897 | ||
898 | /* no transaction are ready yet (transactions are only just | 898 | /* no transaction are ready yet (transactions are only just |
899 | * queued (GC_QUEUE) and not entered for group commit yet). | 899 | * queued (GC_QUEUE) and not entered for group commit yet). |
900 | * the first transaction entering group commit | 900 | * the first transaction entering group commit |
901 | * will elect herself as new group leader. | 901 | * will elect herself as new group leader. |
902 | */ | 902 | */ |
903 | else | 903 | else |
904 | log->cflag &= ~logGC_PAGEOUT; | 904 | log->cflag &= ~logGC_PAGEOUT; |
905 | 905 | ||
906 | //LOGGC_UNLOCK(log); | 906 | //LOGGC_UNLOCK(log); |
907 | spin_unlock_irqrestore(&log->gclock, flags); | 907 | spin_unlock_irqrestore(&log->gclock, flags); |
908 | return; | 908 | return; |
909 | } | 909 | } |
910 | 910 | ||
911 | /* | 911 | /* |
912 | * NAME: lmLogSync() | 912 | * NAME: lmLogSync() |
913 | * | 913 | * |
914 | * FUNCTION: write log SYNCPT record for specified log | 914 | * FUNCTION: write log SYNCPT record for specified log |
915 | * if new sync address is available | 915 | * if new sync address is available |
916 | * (normally the case if sync() is executed by back-ground | 916 | * (normally the case if sync() is executed by back-ground |
917 | * process). | 917 | * process). |
918 | * if not, explicitly run jfs_blogsync() to initiate | ||
919 | * getting of new sync address. | ||
920 | * calculate new value of i_nextsync which determines when | 918 | * calculate new value of i_nextsync which determines when |
921 | * this code is called again. | 919 | * this code is called again. |
922 | * | 920 | * |
923 | * PARAMETERS: log - log structure | 921 | * PARAMETERS: log - log structure |
924 | * nosyncwait - 1 if called asynchronously | 922 | * hard_sync - 1 to force all metadata to be written |
925 | * | 923 | * |
926 | * RETURN: 0 | 924 | * RETURN: 0 |
927 | * | 925 | * |
928 | * serialization: LOG_LOCK() held on entry/exit | 926 | * serialization: LOG_LOCK() held on entry/exit |
929 | */ | 927 | */ |
930 | static int lmLogSync(struct jfs_log * log, int nosyncwait) | 928 | static int lmLogSync(struct jfs_log * log, int hard_sync) |
931 | { | 929 | { |
932 | int logsize; | 930 | int logsize; |
933 | int written; /* written since last syncpt */ | 931 | int written; /* written since last syncpt */ |
934 | int free; /* free space left available */ | 932 | int free; /* free space left available */ |
935 | int delta; /* additional delta to write normally */ | 933 | int delta; /* additional delta to write normally */ |
936 | int more; /* additional write granted */ | 934 | int more; /* additional write granted */ |
937 | struct lrd lrd; | 935 | struct lrd lrd; |
938 | int lsn; | 936 | int lsn; |
939 | struct logsyncblk *lp; | 937 | struct logsyncblk *lp; |
940 | struct jfs_sb_info *sbi; | 938 | struct jfs_sb_info *sbi; |
941 | unsigned long flags; | 939 | unsigned long flags; |
942 | 940 | ||
943 | /* push dirty metapages out to disk */ | 941 | /* push dirty metapages out to disk */ |
944 | list_for_each_entry(sbi, &log->sb_list, log_list) { | 942 | if (hard_sync) |
945 | filemap_flush(sbi->ipbmap->i_mapping); | 943 | list_for_each_entry(sbi, &log->sb_list, log_list) { |
946 | filemap_flush(sbi->ipimap->i_mapping); | 944 | filemap_fdatawrite(sbi->ipbmap->i_mapping); |
947 | filemap_flush(sbi->direct_inode->i_mapping); | 945 | filemap_fdatawrite(sbi->ipimap->i_mapping); |
948 | } | 946 | filemap_fdatawrite(sbi->direct_inode->i_mapping); |
947 | } | ||
948 | else | ||
949 | list_for_each_entry(sbi, &log->sb_list, log_list) { | ||
950 | filemap_flush(sbi->ipbmap->i_mapping); | ||
951 | filemap_flush(sbi->ipimap->i_mapping); | ||
952 | filemap_flush(sbi->direct_inode->i_mapping); | ||
953 | } | ||
949 | 954 | ||
950 | /* | 955 | /* |
951 | * forward syncpt | 956 | * forward syncpt |
952 | */ | 957 | */ |
953 | /* if last sync is same as last syncpt, | 958 | /* if last sync is same as last syncpt, |
954 | * invoke sync point forward processing to update sync. | 959 | * invoke sync point forward processing to update sync. |
955 | */ | 960 | */ |
956 | 961 | ||
957 | if (log->sync == log->syncpt) { | 962 | if (log->sync == log->syncpt) { |
958 | LOGSYNC_LOCK(log, flags); | 963 | LOGSYNC_LOCK(log, flags); |
959 | if (list_empty(&log->synclist)) | 964 | if (list_empty(&log->synclist)) |
960 | log->sync = log->lsn; | 965 | log->sync = log->lsn; |
961 | else { | 966 | else { |
962 | lp = list_entry(log->synclist.next, | 967 | lp = list_entry(log->synclist.next, |
963 | struct logsyncblk, synclist); | 968 | struct logsyncblk, synclist); |
964 | log->sync = lp->lsn; | 969 | log->sync = lp->lsn; |
965 | } | 970 | } |
966 | LOGSYNC_UNLOCK(log, flags); | 971 | LOGSYNC_UNLOCK(log, flags); |
967 | 972 | ||
968 | } | 973 | } |
969 | 974 | ||
970 | /* if sync is different from last syncpt, | 975 | /* if sync is different from last syncpt, |
971 | * write a SYNCPT record with syncpt = sync. | 976 | * write a SYNCPT record with syncpt = sync. |
972 | * reset syncpt = sync | 977 | * reset syncpt = sync |
973 | */ | 978 | */ |
974 | if (log->sync != log->syncpt) { | 979 | if (log->sync != log->syncpt) { |
975 | lrd.logtid = 0; | 980 | lrd.logtid = 0; |
976 | lrd.backchain = 0; | 981 | lrd.backchain = 0; |
977 | lrd.type = cpu_to_le16(LOG_SYNCPT); | 982 | lrd.type = cpu_to_le16(LOG_SYNCPT); |
978 | lrd.length = 0; | 983 | lrd.length = 0; |
979 | lrd.log.syncpt.sync = cpu_to_le32(log->sync); | 984 | lrd.log.syncpt.sync = cpu_to_le32(log->sync); |
980 | lsn = lmWriteRecord(log, NULL, &lrd, NULL); | 985 | lsn = lmWriteRecord(log, NULL, &lrd, NULL); |
981 | 986 | ||
982 | log->syncpt = log->sync; | 987 | log->syncpt = log->sync; |
983 | } else | 988 | } else |
984 | lsn = log->lsn; | 989 | lsn = log->lsn; |
985 | 990 | ||
986 | /* | 991 | /* |
987 | * setup next syncpt trigger (SWAG) | 992 | * setup next syncpt trigger (SWAG) |
988 | */ | 993 | */ |
989 | logsize = log->logsize; | 994 | logsize = log->logsize; |
990 | 995 | ||
991 | logdiff(written, lsn, log); | 996 | logdiff(written, lsn, log); |
992 | free = logsize - written; | 997 | free = logsize - written; |
993 | delta = LOGSYNC_DELTA(logsize); | 998 | delta = LOGSYNC_DELTA(logsize); |
994 | more = min(free / 2, delta); | 999 | more = min(free / 2, delta); |
995 | if (more < 2 * LOGPSIZE) { | 1000 | if (more < 2 * LOGPSIZE) { |
996 | jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); | 1001 | jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); |
997 | /* | 1002 | /* |
998 | * log wrapping | 1003 | * log wrapping |
999 | * | 1004 | * |
1000 | * option 1 - panic ? No.! | 1005 | * option 1 - panic ? No.! |
1001 | * option 2 - shutdown file systems | 1006 | * option 2 - shutdown file systems |
1002 | * associated with log ? | 1007 | * associated with log ? |
1003 | * option 3 - extend log ? | 1008 | * option 3 - extend log ? |
1004 | */ | 1009 | */ |
1005 | /* | 1010 | /* |
1006 | * option 4 - second chance | 1011 | * option 4 - second chance |
1007 | * | 1012 | * |
1008 | * mark log wrapped, and continue. | 1013 | * mark log wrapped, and continue. |
1009 | * when all active transactions are completed, | 1014 | * when all active transactions are completed, |
1010 | * mark log vaild for recovery. | 1015 | * mark log vaild for recovery. |
1011 | * if crashed during invalid state, log state | 1016 | * if crashed during invalid state, log state |
1012 | * implies invald log, forcing fsck(). | 1017 | * implies invald log, forcing fsck(). |
1013 | */ | 1018 | */ |
1014 | /* mark log state log wrap in log superblock */ | 1019 | /* mark log state log wrap in log superblock */ |
1015 | /* log->state = LOGWRAP; */ | 1020 | /* log->state = LOGWRAP; */ |
1016 | 1021 | ||
1017 | /* reset sync point computation */ | 1022 | /* reset sync point computation */ |
1018 | log->syncpt = log->sync = lsn; | 1023 | log->syncpt = log->sync = lsn; |
1019 | log->nextsync = delta; | 1024 | log->nextsync = delta; |
1020 | } else | 1025 | } else |
1021 | /* next syncpt trigger = written + more */ | 1026 | /* next syncpt trigger = written + more */ |
1022 | log->nextsync = written + more; | 1027 | log->nextsync = written + more; |
1023 | 1028 | ||
1024 | /* return if lmLogSync() from outside of transaction, e.g., sync() */ | ||
1025 | if (nosyncwait) | ||
1026 | return lsn; | ||
1027 | |||
1028 | /* if number of bytes written from last sync point is more | 1029 | /* if number of bytes written from last sync point is more |
1029 | * than 1/4 of the log size, stop new transactions from | 1030 | * than 1/4 of the log size, stop new transactions from |
1030 | * starting until all current transactions are completed | 1031 | * starting until all current transactions are completed |
1031 | * by setting syncbarrier flag. | 1032 | * by setting syncbarrier flag. |
1032 | */ | 1033 | */ |
1033 | if (!test_bit(log_SYNCBARRIER, &log->flag) && | 1034 | if (!test_bit(log_SYNCBARRIER, &log->flag) && |
1034 | (written > LOGSYNC_BARRIER(logsize)) && log->active) { | 1035 | (written > LOGSYNC_BARRIER(logsize)) && log->active) { |
1035 | set_bit(log_SYNCBARRIER, &log->flag); | 1036 | set_bit(log_SYNCBARRIER, &log->flag); |
1036 | jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, | 1037 | jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, |
1037 | log->syncpt); | 1038 | log->syncpt); |
1038 | /* | 1039 | /* |
1039 | * We may have to initiate group commit | 1040 | * We may have to initiate group commit |
1040 | */ | 1041 | */ |
1041 | jfs_flush_journal(log, 0); | 1042 | jfs_flush_journal(log, 0); |
1042 | } | 1043 | } |
1043 | 1044 | ||
1044 | return lsn; | 1045 | return lsn; |
1045 | } | 1046 | } |
1046 | 1047 | ||
1047 | /* | 1048 | /* |
1048 | * NAME: jfs_syncpt | 1049 | * NAME: jfs_syncpt |
1049 | * | 1050 | * |
1050 | * FUNCTION: write log SYNCPT record for specified log | 1051 | * FUNCTION: write log SYNCPT record for specified log |
1051 | * | 1052 | * |
1052 | * PARAMETERS: log - log structure | 1053 | * PARAMETERS: log - log structure |
1054 | * hard_sync - set to 1 to force metadata to be written | ||
1053 | */ | 1055 | */ |
1054 | void jfs_syncpt(struct jfs_log *log) | 1056 | void jfs_syncpt(struct jfs_log *log, int hard_sync) |
1055 | { LOG_LOCK(log); | 1057 | { LOG_LOCK(log); |
1056 | lmLogSync(log, 1); | 1058 | lmLogSync(log, hard_sync); |
1057 | LOG_UNLOCK(log); | 1059 | LOG_UNLOCK(log); |
1058 | } | 1060 | } |
1059 | 1061 | ||
1060 | /* | 1062 | /* |
1061 | * NAME: lmLogOpen() | 1063 | * NAME: lmLogOpen() |
1062 | * | 1064 | * |
1063 | * FUNCTION: open the log on first open; | 1065 | * FUNCTION: open the log on first open; |
1064 | * insert filesystem in the active list of the log. | 1066 | * insert filesystem in the active list of the log. |
1065 | * | 1067 | * |
1066 | * PARAMETER: ipmnt - file system mount inode | 1068 | * PARAMETER: ipmnt - file system mount inode |
1067 | * iplog - log inode (out) | 1069 | * iplog - log inode (out) |
1068 | * | 1070 | * |
1069 | * RETURN: | 1071 | * RETURN: |
1070 | * | 1072 | * |
1071 | * serialization: | 1073 | * serialization: |
1072 | */ | 1074 | */ |
1073 | int lmLogOpen(struct super_block *sb) | 1075 | int lmLogOpen(struct super_block *sb) |
1074 | { | 1076 | { |
1075 | int rc; | 1077 | int rc; |
1076 | struct block_device *bdev; | 1078 | struct block_device *bdev; |
1077 | struct jfs_log *log; | 1079 | struct jfs_log *log; |
1078 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 1080 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
1079 | 1081 | ||
1080 | if (sbi->flag & JFS_NOINTEGRITY) | 1082 | if (sbi->flag & JFS_NOINTEGRITY) |
1081 | return open_dummy_log(sb); | 1083 | return open_dummy_log(sb); |
1082 | 1084 | ||
1083 | if (sbi->mntflag & JFS_INLINELOG) | 1085 | if (sbi->mntflag & JFS_INLINELOG) |
1084 | return open_inline_log(sb); | 1086 | return open_inline_log(sb); |
1085 | 1087 | ||
1086 | down(&jfs_log_sem); | 1088 | down(&jfs_log_sem); |
1087 | list_for_each_entry(log, &jfs_external_logs, journal_list) { | 1089 | list_for_each_entry(log, &jfs_external_logs, journal_list) { |
1088 | if (log->bdev->bd_dev == sbi->logdev) { | 1090 | if (log->bdev->bd_dev == sbi->logdev) { |
1089 | if (memcmp(log->uuid, sbi->loguuid, | 1091 | if (memcmp(log->uuid, sbi->loguuid, |
1090 | sizeof(log->uuid))) { | 1092 | sizeof(log->uuid))) { |
1091 | jfs_warn("wrong uuid on JFS journal\n"); | 1093 | jfs_warn("wrong uuid on JFS journal\n"); |
1092 | up(&jfs_log_sem); | 1094 | up(&jfs_log_sem); |
1093 | return -EINVAL; | 1095 | return -EINVAL; |
1094 | } | 1096 | } |
1095 | /* | 1097 | /* |
1096 | * add file system to log active file system list | 1098 | * add file system to log active file system list |
1097 | */ | 1099 | */ |
1098 | if ((rc = lmLogFileSystem(log, sbi, 1))) { | 1100 | if ((rc = lmLogFileSystem(log, sbi, 1))) { |
1099 | up(&jfs_log_sem); | 1101 | up(&jfs_log_sem); |
1100 | return rc; | 1102 | return rc; |
1101 | } | 1103 | } |
1102 | goto journal_found; | 1104 | goto journal_found; |
1103 | } | 1105 | } |
1104 | } | 1106 | } |
1105 | 1107 | ||
1106 | if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { | 1108 | if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { |
1107 | up(&jfs_log_sem); | 1109 | up(&jfs_log_sem); |
1108 | return -ENOMEM; | 1110 | return -ENOMEM; |
1109 | } | 1111 | } |
1110 | memset(log, 0, sizeof(struct jfs_log)); | 1112 | memset(log, 0, sizeof(struct jfs_log)); |
1111 | INIT_LIST_HEAD(&log->sb_list); | 1113 | INIT_LIST_HEAD(&log->sb_list); |
1112 | init_waitqueue_head(&log->syncwait); | 1114 | init_waitqueue_head(&log->syncwait); |
1113 | 1115 | ||
1114 | /* | 1116 | /* |
1115 | * external log as separate logical volume | 1117 | * external log as separate logical volume |
1116 | * | 1118 | * |
1117 | * file systems to log may have n-to-1 relationship; | 1119 | * file systems to log may have n-to-1 relationship; |
1118 | */ | 1120 | */ |
1119 | 1121 | ||
1120 | bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); | 1122 | bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); |
1121 | if (IS_ERR(bdev)) { | 1123 | if (IS_ERR(bdev)) { |
1122 | rc = -PTR_ERR(bdev); | 1124 | rc = -PTR_ERR(bdev); |
1123 | goto free; | 1125 | goto free; |
1124 | } | 1126 | } |
1125 | 1127 | ||
1126 | if ((rc = bd_claim(bdev, log))) { | 1128 | if ((rc = bd_claim(bdev, log))) { |
1127 | goto close; | 1129 | goto close; |
1128 | } | 1130 | } |
1129 | 1131 | ||
1130 | log->bdev = bdev; | 1132 | log->bdev = bdev; |
1131 | memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); | 1133 | memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); |
1132 | 1134 | ||
1133 | /* | 1135 | /* |
1134 | * initialize log: | 1136 | * initialize log: |
1135 | */ | 1137 | */ |
1136 | if ((rc = lmLogInit(log))) | 1138 | if ((rc = lmLogInit(log))) |
1137 | goto unclaim; | 1139 | goto unclaim; |
1138 | 1140 | ||
1139 | list_add(&log->journal_list, &jfs_external_logs); | 1141 | list_add(&log->journal_list, &jfs_external_logs); |
1140 | 1142 | ||
1141 | /* | 1143 | /* |
1142 | * add file system to log active file system list | 1144 | * add file system to log active file system list |
1143 | */ | 1145 | */ |
1144 | if ((rc = lmLogFileSystem(log, sbi, 1))) | 1146 | if ((rc = lmLogFileSystem(log, sbi, 1))) |
1145 | goto shutdown; | 1147 | goto shutdown; |
1146 | 1148 | ||
1147 | journal_found: | 1149 | journal_found: |
1148 | LOG_LOCK(log); | 1150 | LOG_LOCK(log); |
1149 | list_add(&sbi->log_list, &log->sb_list); | 1151 | list_add(&sbi->log_list, &log->sb_list); |
1150 | sbi->log = log; | 1152 | sbi->log = log; |
1151 | LOG_UNLOCK(log); | 1153 | LOG_UNLOCK(log); |
1152 | 1154 | ||
1153 | up(&jfs_log_sem); | 1155 | up(&jfs_log_sem); |
1154 | return 0; | 1156 | return 0; |
1155 | 1157 | ||
1156 | /* | 1158 | /* |
1157 | * unwind on error | 1159 | * unwind on error |
1158 | */ | 1160 | */ |
1159 | shutdown: /* unwind lbmLogInit() */ | 1161 | shutdown: /* unwind lbmLogInit() */ |
1160 | list_del(&log->journal_list); | 1162 | list_del(&log->journal_list); |
1161 | lbmLogShutdown(log); | 1163 | lbmLogShutdown(log); |
1162 | 1164 | ||
1163 | unclaim: | 1165 | unclaim: |
1164 | bd_release(bdev); | 1166 | bd_release(bdev); |
1165 | 1167 | ||
1166 | close: /* close external log device */ | 1168 | close: /* close external log device */ |
1167 | blkdev_put(bdev); | 1169 | blkdev_put(bdev); |
1168 | 1170 | ||
1169 | free: /* free log descriptor */ | 1171 | free: /* free log descriptor */ |
1170 | up(&jfs_log_sem); | 1172 | up(&jfs_log_sem); |
1171 | kfree(log); | 1173 | kfree(log); |
1172 | 1174 | ||
1173 | jfs_warn("lmLogOpen: exit(%d)", rc); | 1175 | jfs_warn("lmLogOpen: exit(%d)", rc); |
1174 | return rc; | 1176 | return rc; |
1175 | } | 1177 | } |
1176 | 1178 | ||
1177 | static int open_inline_log(struct super_block *sb) | 1179 | static int open_inline_log(struct super_block *sb) |
1178 | { | 1180 | { |
1179 | struct jfs_log *log; | 1181 | struct jfs_log *log; |
1180 | int rc; | 1182 | int rc; |
1181 | 1183 | ||
1182 | if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) | 1184 | if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) |
1183 | return -ENOMEM; | 1185 | return -ENOMEM; |
1184 | memset(log, 0, sizeof(struct jfs_log)); | 1186 | memset(log, 0, sizeof(struct jfs_log)); |
1185 | INIT_LIST_HEAD(&log->sb_list); | 1187 | INIT_LIST_HEAD(&log->sb_list); |
1186 | init_waitqueue_head(&log->syncwait); | 1188 | init_waitqueue_head(&log->syncwait); |
1187 | 1189 | ||
1188 | set_bit(log_INLINELOG, &log->flag); | 1190 | set_bit(log_INLINELOG, &log->flag); |
1189 | log->bdev = sb->s_bdev; | 1191 | log->bdev = sb->s_bdev; |
1190 | log->base = addressPXD(&JFS_SBI(sb)->logpxd); | 1192 | log->base = addressPXD(&JFS_SBI(sb)->logpxd); |
1191 | log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> | 1193 | log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> |
1192 | (L2LOGPSIZE - sb->s_blocksize_bits); | 1194 | (L2LOGPSIZE - sb->s_blocksize_bits); |
1193 | log->l2bsize = sb->s_blocksize_bits; | 1195 | log->l2bsize = sb->s_blocksize_bits; |
1194 | ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits); | 1196 | ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits); |
1195 | 1197 | ||
1196 | /* | 1198 | /* |
1197 | * initialize log. | 1199 | * initialize log. |
1198 | */ | 1200 | */ |
1199 | if ((rc = lmLogInit(log))) { | 1201 | if ((rc = lmLogInit(log))) { |
1200 | kfree(log); | 1202 | kfree(log); |
1201 | jfs_warn("lmLogOpen: exit(%d)", rc); | 1203 | jfs_warn("lmLogOpen: exit(%d)", rc); |
1202 | return rc; | 1204 | return rc; |
1203 | } | 1205 | } |
1204 | 1206 | ||
1205 | list_add(&JFS_SBI(sb)->log_list, &log->sb_list); | 1207 | list_add(&JFS_SBI(sb)->log_list, &log->sb_list); |
1206 | JFS_SBI(sb)->log = log; | 1208 | JFS_SBI(sb)->log = log; |
1207 | 1209 | ||
1208 | return rc; | 1210 | return rc; |
1209 | } | 1211 | } |
1210 | 1212 | ||
1211 | static int open_dummy_log(struct super_block *sb) | 1213 | static int open_dummy_log(struct super_block *sb) |
1212 | { | 1214 | { |
1213 | int rc; | 1215 | int rc; |
1214 | 1216 | ||
1215 | down(&jfs_log_sem); | 1217 | down(&jfs_log_sem); |
1216 | if (!dummy_log) { | 1218 | if (!dummy_log) { |
1217 | dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL); | 1219 | dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL); |
1218 | if (!dummy_log) { | 1220 | if (!dummy_log) { |
1219 | up(&jfs_log_sem); | 1221 | up(&jfs_log_sem); |
1220 | return -ENOMEM; | 1222 | return -ENOMEM; |
1221 | } | 1223 | } |
1222 | memset(dummy_log, 0, sizeof(struct jfs_log)); | 1224 | memset(dummy_log, 0, sizeof(struct jfs_log)); |
1223 | INIT_LIST_HEAD(&dummy_log->sb_list); | 1225 | INIT_LIST_HEAD(&dummy_log->sb_list); |
1224 | init_waitqueue_head(&dummy_log->syncwait); | 1226 | init_waitqueue_head(&dummy_log->syncwait); |
1225 | dummy_log->no_integrity = 1; | 1227 | dummy_log->no_integrity = 1; |
1226 | /* Make up some stuff */ | 1228 | /* Make up some stuff */ |
1227 | dummy_log->base = 0; | 1229 | dummy_log->base = 0; |
1228 | dummy_log->size = 1024; | 1230 | dummy_log->size = 1024; |
1229 | rc = lmLogInit(dummy_log); | 1231 | rc = lmLogInit(dummy_log); |
1230 | if (rc) { | 1232 | if (rc) { |
1231 | kfree(dummy_log); | 1233 | kfree(dummy_log); |
1232 | dummy_log = NULL; | 1234 | dummy_log = NULL; |
1233 | up(&jfs_log_sem); | 1235 | up(&jfs_log_sem); |
1234 | return rc; | 1236 | return rc; |
1235 | } | 1237 | } |
1236 | } | 1238 | } |
1237 | 1239 | ||
1238 | LOG_LOCK(dummy_log); | 1240 | LOG_LOCK(dummy_log); |
1239 | list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list); | 1241 | list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list); |
1240 | JFS_SBI(sb)->log = dummy_log; | 1242 | JFS_SBI(sb)->log = dummy_log; |
1241 | LOG_UNLOCK(dummy_log); | 1243 | LOG_UNLOCK(dummy_log); |
1242 | up(&jfs_log_sem); | 1244 | up(&jfs_log_sem); |
1243 | 1245 | ||
1244 | return 0; | 1246 | return 0; |
1245 | } | 1247 | } |
1246 | 1248 | ||
1247 | /* | 1249 | /* |
1248 | * NAME: lmLogInit() | 1250 | * NAME: lmLogInit() |
1249 | * | 1251 | * |
1250 | * FUNCTION: log initialization at first log open. | 1252 | * FUNCTION: log initialization at first log open. |
1251 | * | 1253 | * |
1252 | * logredo() (or logformat()) should have been run previously. | 1254 | * logredo() (or logformat()) should have been run previously. |
1253 | * initialize the log from log superblock. | 1255 | * initialize the log from log superblock. |
1254 | * set the log state in the superblock to LOGMOUNT and | 1256 | * set the log state in the superblock to LOGMOUNT and |
1255 | * write SYNCPT log record. | 1257 | * write SYNCPT log record. |
1256 | * | 1258 | * |
1257 | * PARAMETER: log - log structure | 1259 | * PARAMETER: log - log structure |
1258 | * | 1260 | * |
1259 | * RETURN: 0 - if ok | 1261 | * RETURN: 0 - if ok |
1260 | * -EINVAL - bad log magic number or superblock dirty | 1262 | * -EINVAL - bad log magic number or superblock dirty |
1261 | * error returned from logwait() | 1263 | * error returned from logwait() |
1262 | * | 1264 | * |
1263 | * serialization: single first open thread | 1265 | * serialization: single first open thread |
1264 | */ | 1266 | */ |
1265 | int lmLogInit(struct jfs_log * log) | 1267 | int lmLogInit(struct jfs_log * log) |
1266 | { | 1268 | { |
1267 | int rc = 0; | 1269 | int rc = 0; |
1268 | struct lrd lrd; | 1270 | struct lrd lrd; |
1269 | struct logsuper *logsuper; | 1271 | struct logsuper *logsuper; |
1270 | struct lbuf *bpsuper; | 1272 | struct lbuf *bpsuper; |
1271 | struct lbuf *bp; | 1273 | struct lbuf *bp; |
1272 | struct logpage *lp; | 1274 | struct logpage *lp; |
1273 | int lsn = 0; | 1275 | int lsn = 0; |
1274 | 1276 | ||
1275 | jfs_info("lmLogInit: log:0x%p", log); | 1277 | jfs_info("lmLogInit: log:0x%p", log); |
1276 | 1278 | ||
1277 | /* initialize the group commit serialization lock */ | 1279 | /* initialize the group commit serialization lock */ |
1278 | LOGGC_LOCK_INIT(log); | 1280 | LOGGC_LOCK_INIT(log); |
1279 | 1281 | ||
1280 | /* allocate/initialize the log write serialization lock */ | 1282 | /* allocate/initialize the log write serialization lock */ |
1281 | LOG_LOCK_INIT(log); | 1283 | LOG_LOCK_INIT(log); |
1282 | 1284 | ||
1283 | LOGSYNC_LOCK_INIT(log); | 1285 | LOGSYNC_LOCK_INIT(log); |
1284 | 1286 | ||
1285 | INIT_LIST_HEAD(&log->synclist); | 1287 | INIT_LIST_HEAD(&log->synclist); |
1286 | 1288 | ||
1287 | INIT_LIST_HEAD(&log->cqueue); | 1289 | INIT_LIST_HEAD(&log->cqueue); |
1288 | log->flush_tblk = NULL; | 1290 | log->flush_tblk = NULL; |
1289 | 1291 | ||
1290 | log->count = 0; | 1292 | log->count = 0; |
1291 | 1293 | ||
1292 | /* | 1294 | /* |
1293 | * initialize log i/o | 1295 | * initialize log i/o |
1294 | */ | 1296 | */ |
1295 | if ((rc = lbmLogInit(log))) | 1297 | if ((rc = lbmLogInit(log))) |
1296 | return rc; | 1298 | return rc; |
1297 | 1299 | ||
1298 | if (!test_bit(log_INLINELOG, &log->flag)) | 1300 | if (!test_bit(log_INLINELOG, &log->flag)) |
1299 | log->l2bsize = L2LOGPSIZE; | 1301 | log->l2bsize = L2LOGPSIZE; |
1300 | 1302 | ||
1301 | /* check for disabled journaling to disk */ | 1303 | /* check for disabled journaling to disk */ |
1302 | if (log->no_integrity) { | 1304 | if (log->no_integrity) { |
1303 | /* | 1305 | /* |
1304 | * Journal pages will still be filled. When the time comes | 1306 | * Journal pages will still be filled. When the time comes |
1305 | * to actually do the I/O, the write is not done, and the | 1307 | * to actually do the I/O, the write is not done, and the |
1306 | * endio routine is called directly. | 1308 | * endio routine is called directly. |
1307 | */ | 1309 | */ |
1308 | bp = lbmAllocate(log , 0); | 1310 | bp = lbmAllocate(log , 0); |
1309 | log->bp = bp; | 1311 | log->bp = bp; |
1310 | bp->l_pn = bp->l_eor = 0; | 1312 | bp->l_pn = bp->l_eor = 0; |
1311 | } else { | 1313 | } else { |
1312 | /* | 1314 | /* |
1313 | * validate log superblock | 1315 | * validate log superblock |
1314 | */ | 1316 | */ |
1315 | if ((rc = lbmRead(log, 1, &bpsuper))) | 1317 | if ((rc = lbmRead(log, 1, &bpsuper))) |
1316 | goto errout10; | 1318 | goto errout10; |
1317 | 1319 | ||
1318 | logsuper = (struct logsuper *) bpsuper->l_ldata; | 1320 | logsuper = (struct logsuper *) bpsuper->l_ldata; |
1319 | 1321 | ||
1320 | if (logsuper->magic != cpu_to_le32(LOGMAGIC)) { | 1322 | if (logsuper->magic != cpu_to_le32(LOGMAGIC)) { |
1321 | jfs_warn("*** Log Format Error ! ***"); | 1323 | jfs_warn("*** Log Format Error ! ***"); |
1322 | rc = -EINVAL; | 1324 | rc = -EINVAL; |
1323 | goto errout20; | 1325 | goto errout20; |
1324 | } | 1326 | } |
1325 | 1327 | ||
1326 | /* logredo() should have been run successfully. */ | 1328 | /* logredo() should have been run successfully. */ |
1327 | if (logsuper->state != cpu_to_le32(LOGREDONE)) { | 1329 | if (logsuper->state != cpu_to_le32(LOGREDONE)) { |
1328 | jfs_warn("*** Log Is Dirty ! ***"); | 1330 | jfs_warn("*** Log Is Dirty ! ***"); |
1329 | rc = -EINVAL; | 1331 | rc = -EINVAL; |
1330 | goto errout20; | 1332 | goto errout20; |
1331 | } | 1333 | } |
1332 | 1334 | ||
1333 | /* initialize log from log superblock */ | 1335 | /* initialize log from log superblock */ |
1334 | if (test_bit(log_INLINELOG,&log->flag)) { | 1336 | if (test_bit(log_INLINELOG,&log->flag)) { |
1335 | if (log->size != le32_to_cpu(logsuper->size)) { | 1337 | if (log->size != le32_to_cpu(logsuper->size)) { |
1336 | rc = -EINVAL; | 1338 | rc = -EINVAL; |
1337 | goto errout20; | 1339 | goto errout20; |
1338 | } | 1340 | } |
1339 | jfs_info("lmLogInit: inline log:0x%p base:0x%Lx " | 1341 | jfs_info("lmLogInit: inline log:0x%p base:0x%Lx " |
1340 | "size:0x%x", log, | 1342 | "size:0x%x", log, |
1341 | (unsigned long long) log->base, log->size); | 1343 | (unsigned long long) log->base, log->size); |
1342 | } else { | 1344 | } else { |
1343 | if (memcmp(logsuper->uuid, log->uuid, 16)) { | 1345 | if (memcmp(logsuper->uuid, log->uuid, 16)) { |
1344 | jfs_warn("wrong uuid on JFS log device"); | 1346 | jfs_warn("wrong uuid on JFS log device"); |
1345 | goto errout20; | 1347 | goto errout20; |
1346 | } | 1348 | } |
1347 | log->size = le32_to_cpu(logsuper->size); | 1349 | log->size = le32_to_cpu(logsuper->size); |
1348 | log->l2bsize = le32_to_cpu(logsuper->l2bsize); | 1350 | log->l2bsize = le32_to_cpu(logsuper->l2bsize); |
1349 | jfs_info("lmLogInit: external log:0x%p base:0x%Lx " | 1351 | jfs_info("lmLogInit: external log:0x%p base:0x%Lx " |
1350 | "size:0x%x", log, | 1352 | "size:0x%x", log, |
1351 | (unsigned long long) log->base, log->size); | 1353 | (unsigned long long) log->base, log->size); |
1352 | } | 1354 | } |
1353 | 1355 | ||
1354 | log->page = le32_to_cpu(logsuper->end) / LOGPSIZE; | 1356 | log->page = le32_to_cpu(logsuper->end) / LOGPSIZE; |
1355 | log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page); | 1357 | log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page); |
1356 | 1358 | ||
1357 | /* | 1359 | /* |
1358 | * initialize for log append write mode | 1360 | * initialize for log append write mode |
1359 | */ | 1361 | */ |
1360 | /* establish current/end-of-log page/buffer */ | 1362 | /* establish current/end-of-log page/buffer */ |
1361 | if ((rc = lbmRead(log, log->page, &bp))) | 1363 | if ((rc = lbmRead(log, log->page, &bp))) |
1362 | goto errout20; | 1364 | goto errout20; |
1363 | 1365 | ||
1364 | lp = (struct logpage *) bp->l_ldata; | 1366 | lp = (struct logpage *) bp->l_ldata; |
1365 | 1367 | ||
1366 | jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d", | 1368 | jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d", |
1367 | le32_to_cpu(logsuper->end), log->page, log->eor, | 1369 | le32_to_cpu(logsuper->end), log->page, log->eor, |
1368 | le16_to_cpu(lp->h.eor)); | 1370 | le16_to_cpu(lp->h.eor)); |
1369 | 1371 | ||
1370 | log->bp = bp; | 1372 | log->bp = bp; |
1371 | bp->l_pn = log->page; | 1373 | bp->l_pn = log->page; |
1372 | bp->l_eor = log->eor; | 1374 | bp->l_eor = log->eor; |
1373 | 1375 | ||
1374 | /* if current page is full, move on to next page */ | 1376 | /* if current page is full, move on to next page */ |
1375 | if (log->eor >= LOGPSIZE - LOGPTLRSIZE) | 1377 | if (log->eor >= LOGPSIZE - LOGPTLRSIZE) |
1376 | lmNextPage(log); | 1378 | lmNextPage(log); |
1377 | 1379 | ||
1378 | /* | 1380 | /* |
1379 | * initialize log syncpoint | 1381 | * initialize log syncpoint |
1380 | */ | 1382 | */ |
1381 | /* | 1383 | /* |
1382 | * write the first SYNCPT record with syncpoint = 0 | 1384 | * write the first SYNCPT record with syncpoint = 0 |
1383 | * (i.e., log redo up to HERE !); | 1385 | * (i.e., log redo up to HERE !); |
1384 | * remove current page from lbm write queue at end of pageout | 1386 | * remove current page from lbm write queue at end of pageout |
1385 | * (to write log superblock update), but do not release to | 1387 | * (to write log superblock update), but do not release to |
1386 | * freelist; | 1388 | * freelist; |
1387 | */ | 1389 | */ |
1388 | lrd.logtid = 0; | 1390 | lrd.logtid = 0; |
1389 | lrd.backchain = 0; | 1391 | lrd.backchain = 0; |
1390 | lrd.type = cpu_to_le16(LOG_SYNCPT); | 1392 | lrd.type = cpu_to_le16(LOG_SYNCPT); |
1391 | lrd.length = 0; | 1393 | lrd.length = 0; |
1392 | lrd.log.syncpt.sync = 0; | 1394 | lrd.log.syncpt.sync = 0; |
1393 | lsn = lmWriteRecord(log, NULL, &lrd, NULL); | 1395 | lsn = lmWriteRecord(log, NULL, &lrd, NULL); |
1394 | bp = log->bp; | 1396 | bp = log->bp; |
1395 | bp->l_ceor = bp->l_eor; | 1397 | bp->l_ceor = bp->l_eor; |
1396 | lp = (struct logpage *) bp->l_ldata; | 1398 | lp = (struct logpage *) bp->l_ldata; |
1397 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); | 1399 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); |
1398 | lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0); | 1400 | lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0); |
1399 | if ((rc = lbmIOWait(bp, 0))) | 1401 | if ((rc = lbmIOWait(bp, 0))) |
1400 | goto errout30; | 1402 | goto errout30; |
1401 | 1403 | ||
1402 | /* | 1404 | /* |
1403 | * update/write superblock | 1405 | * update/write superblock |
1404 | */ | 1406 | */ |
1405 | logsuper->state = cpu_to_le32(LOGMOUNT); | 1407 | logsuper->state = cpu_to_le32(LOGMOUNT); |
1406 | log->serial = le32_to_cpu(logsuper->serial) + 1; | 1408 | log->serial = le32_to_cpu(logsuper->serial) + 1; |
1407 | logsuper->serial = cpu_to_le32(log->serial); | 1409 | logsuper->serial = cpu_to_le32(log->serial); |
1408 | lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); | 1410 | lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); |
1409 | if ((rc = lbmIOWait(bpsuper, lbmFREE))) | 1411 | if ((rc = lbmIOWait(bpsuper, lbmFREE))) |
1410 | goto errout30; | 1412 | goto errout30; |
1411 | } | 1413 | } |
1412 | 1414 | ||
1413 | /* initialize logsync parameters */ | 1415 | /* initialize logsync parameters */ |
1414 | log->logsize = (log->size - 2) << L2LOGPSIZE; | 1416 | log->logsize = (log->size - 2) << L2LOGPSIZE; |
1415 | log->lsn = lsn; | 1417 | log->lsn = lsn; |
1416 | log->syncpt = lsn; | 1418 | log->syncpt = lsn; |
1417 | log->sync = log->syncpt; | 1419 | log->sync = log->syncpt; |
1418 | log->nextsync = LOGSYNC_DELTA(log->logsize); | 1420 | log->nextsync = LOGSYNC_DELTA(log->logsize); |
1419 | 1421 | ||
1420 | jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x", | 1422 | jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x", |
1421 | log->lsn, log->syncpt, log->sync); | 1423 | log->lsn, log->syncpt, log->sync); |
1422 | 1424 | ||
1423 | /* | 1425 | /* |
1424 | * initialize for lazy/group commit | 1426 | * initialize for lazy/group commit |
1425 | */ | 1427 | */ |
1426 | log->clsn = lsn; | 1428 | log->clsn = lsn; |
1427 | 1429 | ||
1428 | return 0; | 1430 | return 0; |
1429 | 1431 | ||
1430 | /* | 1432 | /* |
1431 | * unwind on error | 1433 | * unwind on error |
1432 | */ | 1434 | */ |
1433 | errout30: /* release log page */ | 1435 | errout30: /* release log page */ |
1434 | log->wqueue = NULL; | 1436 | log->wqueue = NULL; |
1435 | bp->l_wqnext = NULL; | 1437 | bp->l_wqnext = NULL; |
1436 | lbmFree(bp); | 1438 | lbmFree(bp); |
1437 | 1439 | ||
1438 | errout20: /* release log superblock */ | 1440 | errout20: /* release log superblock */ |
1439 | lbmFree(bpsuper); | 1441 | lbmFree(bpsuper); |
1440 | 1442 | ||
1441 | errout10: /* unwind lbmLogInit() */ | 1443 | errout10: /* unwind lbmLogInit() */ |
1442 | lbmLogShutdown(log); | 1444 | lbmLogShutdown(log); |
1443 | 1445 | ||
1444 | jfs_warn("lmLogInit: exit(%d)", rc); | 1446 | jfs_warn("lmLogInit: exit(%d)", rc); |
1445 | return rc; | 1447 | return rc; |
1446 | } | 1448 | } |
1447 | 1449 | ||
1448 | 1450 | ||
1449 | /* | 1451 | /* |
1450 | * NAME: lmLogClose() | 1452 | * NAME: lmLogClose() |
1451 | * | 1453 | * |
1452 | * FUNCTION: remove file system <ipmnt> from active list of log <iplog> | 1454 | * FUNCTION: remove file system <ipmnt> from active list of log <iplog> |
1453 | * and close it on last close. | 1455 | * and close it on last close. |
1454 | * | 1456 | * |
1455 | * PARAMETER: sb - superblock | 1457 | * PARAMETER: sb - superblock |
1456 | * | 1458 | * |
1457 | * RETURN: errors from subroutines | 1459 | * RETURN: errors from subroutines |
1458 | * | 1460 | * |
1459 | * serialization: | 1461 | * serialization: |
1460 | */ | 1462 | */ |
1461 | int lmLogClose(struct super_block *sb) | 1463 | int lmLogClose(struct super_block *sb) |
1462 | { | 1464 | { |
1463 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 1465 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
1464 | struct jfs_log *log = sbi->log; | 1466 | struct jfs_log *log = sbi->log; |
1465 | struct block_device *bdev; | 1467 | struct block_device *bdev; |
1466 | int rc = 0; | 1468 | int rc = 0; |
1467 | 1469 | ||
1468 | jfs_info("lmLogClose: log:0x%p", log); | 1470 | jfs_info("lmLogClose: log:0x%p", log); |
1469 | 1471 | ||
1470 | down(&jfs_log_sem); | 1472 | down(&jfs_log_sem); |
1471 | LOG_LOCK(log); | 1473 | LOG_LOCK(log); |
1472 | list_del(&sbi->log_list); | 1474 | list_del(&sbi->log_list); |
1473 | LOG_UNLOCK(log); | 1475 | LOG_UNLOCK(log); |
1474 | sbi->log = NULL; | 1476 | sbi->log = NULL; |
1475 | 1477 | ||
1476 | /* | 1478 | /* |
1477 | * We need to make sure all of the "written" metapages | 1479 | * We need to make sure all of the "written" metapages |
1478 | * actually make it to disk | 1480 | * actually make it to disk |
1479 | */ | 1481 | */ |
1480 | sync_blockdev(sb->s_bdev); | 1482 | sync_blockdev(sb->s_bdev); |
1481 | 1483 | ||
1482 | if (test_bit(log_INLINELOG, &log->flag)) { | 1484 | if (test_bit(log_INLINELOG, &log->flag)) { |
1483 | /* | 1485 | /* |
1484 | * in-line log in host file system | 1486 | * in-line log in host file system |
1485 | */ | 1487 | */ |
1486 | rc = lmLogShutdown(log); | 1488 | rc = lmLogShutdown(log); |
1487 | kfree(log); | 1489 | kfree(log); |
1488 | goto out; | 1490 | goto out; |
1489 | } | 1491 | } |
1490 | 1492 | ||
1491 | if (!log->no_integrity) | 1493 | if (!log->no_integrity) |
1492 | lmLogFileSystem(log, sbi, 0); | 1494 | lmLogFileSystem(log, sbi, 0); |
1493 | 1495 | ||
1494 | if (!list_empty(&log->sb_list)) | 1496 | if (!list_empty(&log->sb_list)) |
1495 | goto out; | 1497 | goto out; |
1496 | 1498 | ||
1497 | /* | 1499 | /* |
1498 | * TODO: ensure that the dummy_log is in a state to allow | 1500 | * TODO: ensure that the dummy_log is in a state to allow |
1499 | * lbmLogShutdown to deallocate all the buffers and call | 1501 | * lbmLogShutdown to deallocate all the buffers and call |
1500 | * kfree against dummy_log. For now, leave dummy_log & its | 1502 | * kfree against dummy_log. For now, leave dummy_log & its |
1501 | * buffers in memory, and resuse if another no-integrity mount | 1503 | * buffers in memory, and resuse if another no-integrity mount |
1502 | * is requested. | 1504 | * is requested. |
1503 | */ | 1505 | */ |
1504 | if (log->no_integrity) | 1506 | if (log->no_integrity) |
1505 | goto out; | 1507 | goto out; |
1506 | 1508 | ||
1507 | /* | 1509 | /* |
1508 | * external log as separate logical volume | 1510 | * external log as separate logical volume |
1509 | */ | 1511 | */ |
1510 | list_del(&log->journal_list); | 1512 | list_del(&log->journal_list); |
1511 | bdev = log->bdev; | 1513 | bdev = log->bdev; |
1512 | rc = lmLogShutdown(log); | 1514 | rc = lmLogShutdown(log); |
1513 | 1515 | ||
1514 | bd_release(bdev); | 1516 | bd_release(bdev); |
1515 | blkdev_put(bdev); | 1517 | blkdev_put(bdev); |
1516 | 1518 | ||
1517 | kfree(log); | 1519 | kfree(log); |
1518 | 1520 | ||
1519 | out: | 1521 | out: |
1520 | up(&jfs_log_sem); | 1522 | up(&jfs_log_sem); |
1521 | jfs_info("lmLogClose: exit(%d)", rc); | 1523 | jfs_info("lmLogClose: exit(%d)", rc); |
1522 | return rc; | 1524 | return rc; |
1523 | } | 1525 | } |
1524 | 1526 | ||
1525 | 1527 | ||
1526 | /* | 1528 | /* |
1527 | * NAME: jfs_flush_journal() | 1529 | * NAME: jfs_flush_journal() |
1528 | * | 1530 | * |
1529 | * FUNCTION: initiate write of any outstanding transactions to the journal | 1531 | * FUNCTION: initiate write of any outstanding transactions to the journal |
1530 | * and optionally wait until they are all written to disk | 1532 | * and optionally wait until they are all written to disk |
1531 | * | 1533 | * |
1532 | * wait == 0 flush until latest txn is committed, don't wait | 1534 | * wait == 0 flush until latest txn is committed, don't wait |
1533 | * wait == 1 flush until latest txn is committed, wait | 1535 | * wait == 1 flush until latest txn is committed, wait |
1534 | * wait > 1 flush until all txn's are complete, wait | 1536 | * wait > 1 flush until all txn's are complete, wait |
1535 | */ | 1537 | */ |
1536 | void jfs_flush_journal(struct jfs_log *log, int wait) | 1538 | void jfs_flush_journal(struct jfs_log *log, int wait) |
1537 | { | 1539 | { |
1538 | int i; | 1540 | int i; |
1539 | struct tblock *target = NULL; | 1541 | struct tblock *target = NULL; |
1540 | struct jfs_sb_info *sbi; | 1542 | struct jfs_sb_info *sbi; |
1541 | 1543 | ||
1542 | /* jfs_write_inode may call us during read-only mount */ | 1544 | /* jfs_write_inode may call us during read-only mount */ |
1543 | if (!log) | 1545 | if (!log) |
1544 | return; | 1546 | return; |
1545 | 1547 | ||
1546 | jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait); | 1548 | jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait); |
1547 | 1549 | ||
1548 | LOGGC_LOCK(log); | 1550 | LOGGC_LOCK(log); |
1549 | 1551 | ||
1550 | if (!list_empty(&log->cqueue)) { | 1552 | if (!list_empty(&log->cqueue)) { |
1551 | /* | 1553 | /* |
1552 | * This ensures that we will keep writing to the journal as long | 1554 | * This ensures that we will keep writing to the journal as long |
1553 | * as there are unwritten commit records | 1555 | * as there are unwritten commit records |
1554 | */ | 1556 | */ |
1555 | target = list_entry(log->cqueue.prev, struct tblock, cqueue); | 1557 | target = list_entry(log->cqueue.prev, struct tblock, cqueue); |
1556 | 1558 | ||
1557 | if (test_bit(log_FLUSH, &log->flag)) { | 1559 | if (test_bit(log_FLUSH, &log->flag)) { |
1558 | /* | 1560 | /* |
1559 | * We're already flushing. | 1561 | * We're already flushing. |
1560 | * if flush_tblk is NULL, we are flushing everything, | 1562 | * if flush_tblk is NULL, we are flushing everything, |
1561 | * so leave it that way. Otherwise, update it to the | 1563 | * so leave it that way. Otherwise, update it to the |
1562 | * latest transaction | 1564 | * latest transaction |
1563 | */ | 1565 | */ |
1564 | if (log->flush_tblk) | 1566 | if (log->flush_tblk) |
1565 | log->flush_tblk = target; | 1567 | log->flush_tblk = target; |
1566 | } else { | 1568 | } else { |
1567 | /* Only flush until latest transaction is committed */ | 1569 | /* Only flush until latest transaction is committed */ |
1568 | log->flush_tblk = target; | 1570 | log->flush_tblk = target; |
1569 | set_bit(log_FLUSH, &log->flag); | 1571 | set_bit(log_FLUSH, &log->flag); |
1570 | 1572 | ||
1571 | /* | 1573 | /* |
1572 | * Initiate I/O on outstanding transactions | 1574 | * Initiate I/O on outstanding transactions |
1573 | */ | 1575 | */ |
1574 | if (!(log->cflag & logGC_PAGEOUT)) { | 1576 | if (!(log->cflag & logGC_PAGEOUT)) { |
1575 | log->cflag |= logGC_PAGEOUT; | 1577 | log->cflag |= logGC_PAGEOUT; |
1576 | lmGCwrite(log, 0); | 1578 | lmGCwrite(log, 0); |
1577 | } | 1579 | } |
1578 | } | 1580 | } |
1579 | } | 1581 | } |
1580 | if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) { | 1582 | if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) { |
1581 | /* Flush until all activity complete */ | 1583 | /* Flush until all activity complete */ |
1582 | set_bit(log_FLUSH, &log->flag); | 1584 | set_bit(log_FLUSH, &log->flag); |
1583 | log->flush_tblk = NULL; | 1585 | log->flush_tblk = NULL; |
1584 | } | 1586 | } |
1585 | 1587 | ||
1586 | if (wait && target && !(target->flag & tblkGC_COMMITTED)) { | 1588 | if (wait && target && !(target->flag & tblkGC_COMMITTED)) { |
1587 | DECLARE_WAITQUEUE(__wait, current); | 1589 | DECLARE_WAITQUEUE(__wait, current); |
1588 | 1590 | ||
1589 | add_wait_queue(&target->gcwait, &__wait); | 1591 | add_wait_queue(&target->gcwait, &__wait); |
1590 | set_current_state(TASK_UNINTERRUPTIBLE); | 1592 | set_current_state(TASK_UNINTERRUPTIBLE); |
1591 | LOGGC_UNLOCK(log); | 1593 | LOGGC_UNLOCK(log); |
1592 | schedule(); | 1594 | schedule(); |
1593 | current->state = TASK_RUNNING; | 1595 | current->state = TASK_RUNNING; |
1594 | LOGGC_LOCK(log); | 1596 | LOGGC_LOCK(log); |
1595 | remove_wait_queue(&target->gcwait, &__wait); | 1597 | remove_wait_queue(&target->gcwait, &__wait); |
1596 | } | 1598 | } |
1597 | LOGGC_UNLOCK(log); | 1599 | LOGGC_UNLOCK(log); |
1598 | 1600 | ||
1599 | if (wait < 2) | 1601 | if (wait < 2) |
1600 | return; | 1602 | return; |
1601 | 1603 | ||
1602 | list_for_each_entry(sbi, &log->sb_list, log_list) { | 1604 | list_for_each_entry(sbi, &log->sb_list, log_list) { |
1603 | filemap_fdatawrite(sbi->ipbmap->i_mapping); | 1605 | filemap_fdatawrite(sbi->ipbmap->i_mapping); |
1604 | filemap_fdatawrite(sbi->ipimap->i_mapping); | 1606 | filemap_fdatawrite(sbi->ipimap->i_mapping); |
1605 | filemap_fdatawrite(sbi->direct_inode->i_mapping); | 1607 | filemap_fdatawrite(sbi->direct_inode->i_mapping); |
1606 | } | 1608 | } |
1607 | 1609 | ||
1608 | /* | 1610 | /* |
1609 | * If there was recent activity, we may need to wait | 1611 | * If there was recent activity, we may need to wait |
1610 | * for the lazycommit thread to catch up | 1612 | * for the lazycommit thread to catch up |
1611 | */ | 1613 | */ |
1612 | if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { | 1614 | if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { |
1613 | for (i = 0; i < 200; i++) { /* Too much? */ | 1615 | for (i = 0; i < 200; i++) { /* Too much? */ |
1614 | msleep(250); | 1616 | msleep(250); |
1615 | if (list_empty(&log->cqueue) && | 1617 | if (list_empty(&log->cqueue) && |
1616 | list_empty(&log->synclist)) | 1618 | list_empty(&log->synclist)) |
1617 | break; | 1619 | break; |
1618 | } | 1620 | } |
1619 | } | 1621 | } |
1620 | assert(list_empty(&log->cqueue)); | 1622 | assert(list_empty(&log->cqueue)); |
1621 | 1623 | ||
1622 | #ifdef CONFIG_JFS_DEBUG | 1624 | #ifdef CONFIG_JFS_DEBUG |
1623 | if (!list_empty(&log->synclist)) { | 1625 | if (!list_empty(&log->synclist)) { |
1624 | struct logsyncblk *lp; | 1626 | struct logsyncblk *lp; |
1625 | 1627 | ||
1626 | list_for_each_entry(lp, &log->synclist, synclist) { | 1628 | list_for_each_entry(lp, &log->synclist, synclist) { |
1627 | if (lp->xflag & COMMIT_PAGE) { | 1629 | if (lp->xflag & COMMIT_PAGE) { |
1628 | struct metapage *mp = (struct metapage *)lp; | 1630 | struct metapage *mp = (struct metapage *)lp; |
1629 | dump_mem("orphan metapage", lp, | 1631 | dump_mem("orphan metapage", lp, |
1630 | sizeof(struct metapage)); | 1632 | sizeof(struct metapage)); |
1631 | dump_mem("page", mp->page, sizeof(struct page)); | 1633 | dump_mem("page", mp->page, sizeof(struct page)); |
1632 | } | 1634 | } |
1633 | else | 1635 | else |
1634 | dump_mem("orphan tblock", lp, | 1636 | dump_mem("orphan tblock", lp, |
1635 | sizeof(struct tblock)); | 1637 | sizeof(struct tblock)); |
1636 | } | 1638 | } |
1637 | } | 1639 | } |
1638 | #endif | 1640 | #endif |
1639 | //assert(list_empty(&log->synclist)); | 1641 | //assert(list_empty(&log->synclist)); |
1640 | clear_bit(log_FLUSH, &log->flag); | 1642 | clear_bit(log_FLUSH, &log->flag); |
1641 | } | 1643 | } |
1642 | 1644 | ||
1643 | /* | 1645 | /* |
1644 | * NAME: lmLogShutdown() | 1646 | * NAME: lmLogShutdown() |
1645 | * | 1647 | * |
1646 | * FUNCTION: log shutdown at last LogClose(). | 1648 | * FUNCTION: log shutdown at last LogClose(). |
1647 | * | 1649 | * |
1648 | * write log syncpt record. | 1650 | * write log syncpt record. |
1649 | * update super block to set redone flag to 0. | 1651 | * update super block to set redone flag to 0. |
1650 | * | 1652 | * |
1651 | * PARAMETER: log - log inode | 1653 | * PARAMETER: log - log inode |
1652 | * | 1654 | * |
1653 | * RETURN: 0 - success | 1655 | * RETURN: 0 - success |
1654 | * | 1656 | * |
1655 | * serialization: single last close thread | 1657 | * serialization: single last close thread |
1656 | */ | 1658 | */ |
1657 | int lmLogShutdown(struct jfs_log * log) | 1659 | int lmLogShutdown(struct jfs_log * log) |
1658 | { | 1660 | { |
1659 | int rc; | 1661 | int rc; |
1660 | struct lrd lrd; | 1662 | struct lrd lrd; |
1661 | int lsn; | 1663 | int lsn; |
1662 | struct logsuper *logsuper; | 1664 | struct logsuper *logsuper; |
1663 | struct lbuf *bpsuper; | 1665 | struct lbuf *bpsuper; |
1664 | struct lbuf *bp; | 1666 | struct lbuf *bp; |
1665 | struct logpage *lp; | 1667 | struct logpage *lp; |
1666 | 1668 | ||
1667 | jfs_info("lmLogShutdown: log:0x%p", log); | 1669 | jfs_info("lmLogShutdown: log:0x%p", log); |
1668 | 1670 | ||
1669 | jfs_flush_journal(log, 2); | 1671 | jfs_flush_journal(log, 2); |
1670 | 1672 | ||
1671 | /* | 1673 | /* |
1672 | * write the last SYNCPT record with syncpoint = 0 | 1674 | * write the last SYNCPT record with syncpoint = 0 |
1673 | * (i.e., log redo up to HERE !) | 1675 | * (i.e., log redo up to HERE !) |
1674 | */ | 1676 | */ |
1675 | lrd.logtid = 0; | 1677 | lrd.logtid = 0; |
1676 | lrd.backchain = 0; | 1678 | lrd.backchain = 0; |
1677 | lrd.type = cpu_to_le16(LOG_SYNCPT); | 1679 | lrd.type = cpu_to_le16(LOG_SYNCPT); |
1678 | lrd.length = 0; | 1680 | lrd.length = 0; |
1679 | lrd.log.syncpt.sync = 0; | 1681 | lrd.log.syncpt.sync = 0; |
1680 | 1682 | ||
1681 | lsn = lmWriteRecord(log, NULL, &lrd, NULL); | 1683 | lsn = lmWriteRecord(log, NULL, &lrd, NULL); |
1682 | bp = log->bp; | 1684 | bp = log->bp; |
1683 | lp = (struct logpage *) bp->l_ldata; | 1685 | lp = (struct logpage *) bp->l_ldata; |
1684 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); | 1686 | lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); |
1685 | lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); | 1687 | lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); |
1686 | lbmIOWait(log->bp, lbmFREE); | 1688 | lbmIOWait(log->bp, lbmFREE); |
1687 | log->bp = NULL; | 1689 | log->bp = NULL; |
1688 | 1690 | ||
1689 | /* | 1691 | /* |
1690 | * synchronous update log superblock | 1692 | * synchronous update log superblock |
1691 | * mark log state as shutdown cleanly | 1693 | * mark log state as shutdown cleanly |
1692 | * (i.e., Log does not need to be replayed). | 1694 | * (i.e., Log does not need to be replayed). |
1693 | */ | 1695 | */ |
1694 | if ((rc = lbmRead(log, 1, &bpsuper))) | 1696 | if ((rc = lbmRead(log, 1, &bpsuper))) |
1695 | goto out; | 1697 | goto out; |
1696 | 1698 | ||
1697 | logsuper = (struct logsuper *) bpsuper->l_ldata; | 1699 | logsuper = (struct logsuper *) bpsuper->l_ldata; |
1698 | logsuper->state = cpu_to_le32(LOGREDONE); | 1700 | logsuper->state = cpu_to_le32(LOGREDONE); |
1699 | logsuper->end = cpu_to_le32(lsn); | 1701 | logsuper->end = cpu_to_le32(lsn); |
1700 | lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); | 1702 | lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); |
1701 | rc = lbmIOWait(bpsuper, lbmFREE); | 1703 | rc = lbmIOWait(bpsuper, lbmFREE); |
1702 | 1704 | ||
1703 | jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", | 1705 | jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", |
1704 | lsn, log->page, log->eor); | 1706 | lsn, log->page, log->eor); |
1705 | 1707 | ||
1706 | out: | 1708 | out: |
1707 | /* | 1709 | /* |
1708 | * shutdown per log i/o | 1710 | * shutdown per log i/o |
1709 | */ | 1711 | */ |
1710 | lbmLogShutdown(log); | 1712 | lbmLogShutdown(log); |
1711 | 1713 | ||
1712 | if (rc) { | 1714 | if (rc) { |
1713 | jfs_warn("lmLogShutdown: exit(%d)", rc); | 1715 | jfs_warn("lmLogShutdown: exit(%d)", rc); |
1714 | } | 1716 | } |
1715 | return rc; | 1717 | return rc; |
1716 | } | 1718 | } |
1717 | 1719 | ||
1718 | 1720 | ||
1719 | /* | 1721 | /* |
1720 | * NAME: lmLogFileSystem() | 1722 | * NAME: lmLogFileSystem() |
1721 | * | 1723 | * |
1722 | * FUNCTION: insert (<activate> = true)/remove (<activate> = false) | 1724 | * FUNCTION: insert (<activate> = true)/remove (<activate> = false) |
1723 | * file system into/from log active file system list. | 1725 | * file system into/from log active file system list. |
1724 | * | 1726 | * |
1725 | * PARAMETE: log - pointer to logs inode. | 1727 | * PARAMETE: log - pointer to logs inode. |
1726 | * fsdev - kdev_t of filesystem. | 1728 | * fsdev - kdev_t of filesystem. |
1727 | * serial - pointer to returned log serial number | 1729 | * serial - pointer to returned log serial number |
1728 | * activate - insert/remove device from active list. | 1730 | * activate - insert/remove device from active list. |
1729 | * | 1731 | * |
1730 | * RETURN: 0 - success | 1732 | * RETURN: 0 - success |
1731 | * errors returned by vms_iowait(). | 1733 | * errors returned by vms_iowait(). |
1732 | */ | 1734 | */ |
1733 | static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, | 1735 | static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, |
1734 | int activate) | 1736 | int activate) |
1735 | { | 1737 | { |
1736 | int rc = 0; | 1738 | int rc = 0; |
1737 | int i; | 1739 | int i; |
1738 | struct logsuper *logsuper; | 1740 | struct logsuper *logsuper; |
1739 | struct lbuf *bpsuper; | 1741 | struct lbuf *bpsuper; |
1740 | char *uuid = sbi->uuid; | 1742 | char *uuid = sbi->uuid; |
1741 | 1743 | ||
1742 | /* | 1744 | /* |
1743 | * insert/remove file system device to log active file system list. | 1745 | * insert/remove file system device to log active file system list. |
1744 | */ | 1746 | */ |
1745 | if ((rc = lbmRead(log, 1, &bpsuper))) | 1747 | if ((rc = lbmRead(log, 1, &bpsuper))) |
1746 | return rc; | 1748 | return rc; |
1747 | 1749 | ||
1748 | logsuper = (struct logsuper *) bpsuper->l_ldata; | 1750 | logsuper = (struct logsuper *) bpsuper->l_ldata; |
1749 | if (activate) { | 1751 | if (activate) { |
1750 | for (i = 0; i < MAX_ACTIVE; i++) | 1752 | for (i = 0; i < MAX_ACTIVE; i++) |
1751 | if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) { | 1753 | if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) { |
1752 | memcpy(logsuper->active[i].uuid, uuid, 16); | 1754 | memcpy(logsuper->active[i].uuid, uuid, 16); |
1753 | sbi->aggregate = i; | 1755 | sbi->aggregate = i; |
1754 | break; | 1756 | break; |
1755 | } | 1757 | } |
1756 | if (i == MAX_ACTIVE) { | 1758 | if (i == MAX_ACTIVE) { |
1757 | jfs_warn("Too many file systems sharing journal!"); | 1759 | jfs_warn("Too many file systems sharing journal!"); |
1758 | lbmFree(bpsuper); | 1760 | lbmFree(bpsuper); |
1759 | return -EMFILE; /* Is there a better rc? */ | 1761 | return -EMFILE; /* Is there a better rc? */ |
1760 | } | 1762 | } |
1761 | } else { | 1763 | } else { |
1762 | for (i = 0; i < MAX_ACTIVE; i++) | 1764 | for (i = 0; i < MAX_ACTIVE; i++) |
1763 | if (!memcmp(logsuper->active[i].uuid, uuid, 16)) { | 1765 | if (!memcmp(logsuper->active[i].uuid, uuid, 16)) { |
1764 | memcpy(logsuper->active[i].uuid, NULL_UUID, 16); | 1766 | memcpy(logsuper->active[i].uuid, NULL_UUID, 16); |
1765 | break; | 1767 | break; |
1766 | } | 1768 | } |
1767 | if (i == MAX_ACTIVE) { | 1769 | if (i == MAX_ACTIVE) { |
1768 | jfs_warn("Somebody stomped on the journal!"); | 1770 | jfs_warn("Somebody stomped on the journal!"); |
1769 | lbmFree(bpsuper); | 1771 | lbmFree(bpsuper); |
1770 | return -EIO; | 1772 | return -EIO; |
1771 | } | 1773 | } |
1772 | 1774 | ||
1773 | } | 1775 | } |
1774 | 1776 | ||
1775 | /* | 1777 | /* |
1776 | * synchronous write log superblock: | 1778 | * synchronous write log superblock: |
1777 | * | 1779 | * |
1778 | * write sidestream bypassing write queue: | 1780 | * write sidestream bypassing write queue: |
1779 | * at file system mount, log super block is updated for | 1781 | * at file system mount, log super block is updated for |
1780 | * activation of the file system before any log record | 1782 | * activation of the file system before any log record |
1781 | * (MOUNT record) of the file system, and at file system | 1783 | * (MOUNT record) of the file system, and at file system |
1782 | * unmount, all meta data for the file system has been | 1784 | * unmount, all meta data for the file system has been |
1783 | * flushed before log super block is updated for deactivation | 1785 | * flushed before log super block is updated for deactivation |
1784 | * of the file system. | 1786 | * of the file system. |
1785 | */ | 1787 | */ |
1786 | lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); | 1788 | lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); |
1787 | rc = lbmIOWait(bpsuper, lbmFREE); | 1789 | rc = lbmIOWait(bpsuper, lbmFREE); |
1788 | 1790 | ||
1789 | return rc; | 1791 | return rc; |
1790 | } | 1792 | } |
1791 | 1793 | ||
1792 | /* | 1794 | /* |
1793 | * log buffer manager (lbm) | 1795 | * log buffer manager (lbm) |
1794 | * ------------------------ | 1796 | * ------------------------ |
1795 | * | 1797 | * |
1796 | * special purpose buffer manager supporting log i/o requirements. | 1798 | * special purpose buffer manager supporting log i/o requirements. |
1797 | * | 1799 | * |
1798 | * per log write queue: | 1800 | * per log write queue: |
1799 | * log pageout occurs in serial order by fifo write queue and | 1801 | * log pageout occurs in serial order by fifo write queue and |
1800 | * restricting to a single i/o in pregress at any one time. | 1802 | * restricting to a single i/o in pregress at any one time. |
1801 | * a circular singly-linked list | 1803 | * a circular singly-linked list |
1802 | * (log->wrqueue points to the tail, and buffers are linked via | 1804 | * (log->wrqueue points to the tail, and buffers are linked via |
1803 | * bp->wrqueue field), and | 1805 | * bp->wrqueue field), and |
1804 | * maintains log page in pageout ot waiting for pageout in serial pageout. | 1806 | * maintains log page in pageout ot waiting for pageout in serial pageout. |
1805 | */ | 1807 | */ |
1806 | 1808 | ||
1807 | /* | 1809 | /* |
1808 | * lbmLogInit() | 1810 | * lbmLogInit() |
1809 | * | 1811 | * |
1810 | * initialize per log I/O setup at lmLogInit() | 1812 | * initialize per log I/O setup at lmLogInit() |
1811 | */ | 1813 | */ |
1812 | static int lbmLogInit(struct jfs_log * log) | 1814 | static int lbmLogInit(struct jfs_log * log) |
1813 | { /* log inode */ | 1815 | { /* log inode */ |
1814 | int i; | 1816 | int i; |
1815 | struct lbuf *lbuf; | 1817 | struct lbuf *lbuf; |
1816 | 1818 | ||
1817 | jfs_info("lbmLogInit: log:0x%p", log); | 1819 | jfs_info("lbmLogInit: log:0x%p", log); |
1818 | 1820 | ||
1819 | /* initialize current buffer cursor */ | 1821 | /* initialize current buffer cursor */ |
1820 | log->bp = NULL; | 1822 | log->bp = NULL; |
1821 | 1823 | ||
1822 | /* initialize log device write queue */ | 1824 | /* initialize log device write queue */ |
1823 | log->wqueue = NULL; | 1825 | log->wqueue = NULL; |
1824 | 1826 | ||
1825 | /* | 1827 | /* |
1826 | * Each log has its own buffer pages allocated to it. These are | 1828 | * Each log has its own buffer pages allocated to it. These are |
1827 | * not managed by the page cache. This ensures that a transaction | 1829 | * not managed by the page cache. This ensures that a transaction |
1828 | * writing to the log does not block trying to allocate a page from | 1830 | * writing to the log does not block trying to allocate a page from |
1829 | * the page cache (for the log). This would be bad, since page | 1831 | * the page cache (for the log). This would be bad, since page |
1830 | * allocation waits on the kswapd thread that may be committing inodes | 1832 | * allocation waits on the kswapd thread that may be committing inodes |
1831 | * which would cause log activity. Was that clear? I'm trying to | 1833 | * which would cause log activity. Was that clear? I'm trying to |
1832 | * avoid deadlock here. | 1834 | * avoid deadlock here. |
1833 | */ | 1835 | */ |
1834 | init_waitqueue_head(&log->free_wait); | 1836 | init_waitqueue_head(&log->free_wait); |
1835 | 1837 | ||
1836 | log->lbuf_free = NULL; | 1838 | log->lbuf_free = NULL; |
1837 | 1839 | ||
1838 | for (i = 0; i < LOGPAGES;) { | 1840 | for (i = 0; i < LOGPAGES;) { |
1839 | char *buffer; | 1841 | char *buffer; |
1840 | uint offset; | 1842 | uint offset; |
1841 | struct page *page; | 1843 | struct page *page; |
1842 | 1844 | ||
1843 | buffer = (char *) get_zeroed_page(GFP_KERNEL); | 1845 | buffer = (char *) get_zeroed_page(GFP_KERNEL); |
1844 | if (buffer == NULL) | 1846 | if (buffer == NULL) |
1845 | goto error; | 1847 | goto error; |
1846 | page = virt_to_page(buffer); | 1848 | page = virt_to_page(buffer); |
1847 | for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) { | 1849 | for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) { |
1848 | lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); | 1850 | lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); |
1849 | if (lbuf == NULL) { | 1851 | if (lbuf == NULL) { |
1850 | if (offset == 0) | 1852 | if (offset == 0) |
1851 | free_page((unsigned long) buffer); | 1853 | free_page((unsigned long) buffer); |
1852 | goto error; | 1854 | goto error; |
1853 | } | 1855 | } |
1854 | if (offset) /* we already have one reference */ | 1856 | if (offset) /* we already have one reference */ |
1855 | get_page(page); | 1857 | get_page(page); |
1856 | lbuf->l_offset = offset; | 1858 | lbuf->l_offset = offset; |
1857 | lbuf->l_ldata = buffer + offset; | 1859 | lbuf->l_ldata = buffer + offset; |
1858 | lbuf->l_page = page; | 1860 | lbuf->l_page = page; |
1859 | lbuf->l_log = log; | 1861 | lbuf->l_log = log; |
1860 | init_waitqueue_head(&lbuf->l_ioevent); | 1862 | init_waitqueue_head(&lbuf->l_ioevent); |
1861 | 1863 | ||
1862 | lbuf->l_freelist = log->lbuf_free; | 1864 | lbuf->l_freelist = log->lbuf_free; |
1863 | log->lbuf_free = lbuf; | 1865 | log->lbuf_free = lbuf; |
1864 | i++; | 1866 | i++; |
1865 | } | 1867 | } |
1866 | } | 1868 | } |
1867 | 1869 | ||
1868 | return (0); | 1870 | return (0); |
1869 | 1871 | ||
1870 | error: | 1872 | error: |
1871 | lbmLogShutdown(log); | 1873 | lbmLogShutdown(log); |
1872 | return -ENOMEM; | 1874 | return -ENOMEM; |
1873 | } | 1875 | } |
1874 | 1876 | ||
1875 | 1877 | ||
1876 | /* | 1878 | /* |
1877 | * lbmLogShutdown() | 1879 | * lbmLogShutdown() |
1878 | * | 1880 | * |
1879 | * finalize per log I/O setup at lmLogShutdown() | 1881 | * finalize per log I/O setup at lmLogShutdown() |
1880 | */ | 1882 | */ |
1881 | static void lbmLogShutdown(struct jfs_log * log) | 1883 | static void lbmLogShutdown(struct jfs_log * log) |
1882 | { | 1884 | { |
1883 | struct lbuf *lbuf; | 1885 | struct lbuf *lbuf; |
1884 | 1886 | ||
1885 | jfs_info("lbmLogShutdown: log:0x%p", log); | 1887 | jfs_info("lbmLogShutdown: log:0x%p", log); |
1886 | 1888 | ||
1887 | lbuf = log->lbuf_free; | 1889 | lbuf = log->lbuf_free; |
1888 | while (lbuf) { | 1890 | while (lbuf) { |
1889 | struct lbuf *next = lbuf->l_freelist; | 1891 | struct lbuf *next = lbuf->l_freelist; |
1890 | __free_page(lbuf->l_page); | 1892 | __free_page(lbuf->l_page); |
1891 | kfree(lbuf); | 1893 | kfree(lbuf); |
1892 | lbuf = next; | 1894 | lbuf = next; |
1893 | } | 1895 | } |
1894 | } | 1896 | } |
1895 | 1897 | ||
1896 | 1898 | ||
1897 | /* | 1899 | /* |
1898 | * lbmAllocate() | 1900 | * lbmAllocate() |
1899 | * | 1901 | * |
1900 | * allocate an empty log buffer | 1902 | * allocate an empty log buffer |
1901 | */ | 1903 | */ |
1902 | static struct lbuf *lbmAllocate(struct jfs_log * log, int pn) | 1904 | static struct lbuf *lbmAllocate(struct jfs_log * log, int pn) |
1903 | { | 1905 | { |
1904 | struct lbuf *bp; | 1906 | struct lbuf *bp; |
1905 | unsigned long flags; | 1907 | unsigned long flags; |
1906 | 1908 | ||
1907 | /* | 1909 | /* |
1908 | * recycle from log buffer freelist if any | 1910 | * recycle from log buffer freelist if any |
1909 | */ | 1911 | */ |
1910 | LCACHE_LOCK(flags); | 1912 | LCACHE_LOCK(flags); |
1911 | LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags); | 1913 | LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags); |
1912 | log->lbuf_free = bp->l_freelist; | 1914 | log->lbuf_free = bp->l_freelist; |
1913 | LCACHE_UNLOCK(flags); | 1915 | LCACHE_UNLOCK(flags); |
1914 | 1916 | ||
1915 | bp->l_flag = 0; | 1917 | bp->l_flag = 0; |
1916 | 1918 | ||
1917 | bp->l_wqnext = NULL; | 1919 | bp->l_wqnext = NULL; |
1918 | bp->l_freelist = NULL; | 1920 | bp->l_freelist = NULL; |
1919 | 1921 | ||
1920 | bp->l_pn = pn; | 1922 | bp->l_pn = pn; |
1921 | bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize)); | 1923 | bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize)); |
1922 | bp->l_ceor = 0; | 1924 | bp->l_ceor = 0; |
1923 | 1925 | ||
1924 | return bp; | 1926 | return bp; |
1925 | } | 1927 | } |
1926 | 1928 | ||
1927 | 1929 | ||
1928 | /* | 1930 | /* |
1929 | * lbmFree() | 1931 | * lbmFree() |
1930 | * | 1932 | * |
1931 | * release a log buffer to freelist | 1933 | * release a log buffer to freelist |
1932 | */ | 1934 | */ |
1933 | static void lbmFree(struct lbuf * bp) | 1935 | static void lbmFree(struct lbuf * bp) |
1934 | { | 1936 | { |
1935 | unsigned long flags; | 1937 | unsigned long flags; |
1936 | 1938 | ||
1937 | LCACHE_LOCK(flags); | 1939 | LCACHE_LOCK(flags); |
1938 | 1940 | ||
1939 | lbmfree(bp); | 1941 | lbmfree(bp); |
1940 | 1942 | ||
1941 | LCACHE_UNLOCK(flags); | 1943 | LCACHE_UNLOCK(flags); |
1942 | } | 1944 | } |
1943 | 1945 | ||
1944 | static void lbmfree(struct lbuf * bp) | 1946 | static void lbmfree(struct lbuf * bp) |
1945 | { | 1947 | { |
1946 | struct jfs_log *log = bp->l_log; | 1948 | struct jfs_log *log = bp->l_log; |
1947 | 1949 | ||
1948 | assert(bp->l_wqnext == NULL); | 1950 | assert(bp->l_wqnext == NULL); |
1949 | 1951 | ||
1950 | /* | 1952 | /* |
1951 | * return the buffer to head of freelist | 1953 | * return the buffer to head of freelist |
1952 | */ | 1954 | */ |
1953 | bp->l_freelist = log->lbuf_free; | 1955 | bp->l_freelist = log->lbuf_free; |
1954 | log->lbuf_free = bp; | 1956 | log->lbuf_free = bp; |
1955 | 1957 | ||
1956 | wake_up(&log->free_wait); | 1958 | wake_up(&log->free_wait); |
1957 | return; | 1959 | return; |
1958 | } | 1960 | } |
1959 | 1961 | ||
1960 | 1962 | ||
1961 | /* | 1963 | /* |
1962 | * NAME: lbmRedrive | 1964 | * NAME: lbmRedrive |
1963 | * | 1965 | * |
1964 | * FUNCTION: add a log buffer to the the log redrive list | 1966 | * FUNCTION: add a log buffer to the the log redrive list |
1965 | * | 1967 | * |
1966 | * PARAMETER: | 1968 | * PARAMETER: |
1967 | * bp - log buffer | 1969 | * bp - log buffer |
1968 | * | 1970 | * |
1969 | * NOTES: | 1971 | * NOTES: |
1970 | * Takes log_redrive_lock. | 1972 | * Takes log_redrive_lock. |
1971 | */ | 1973 | */ |
1972 | static inline void lbmRedrive(struct lbuf *bp) | 1974 | static inline void lbmRedrive(struct lbuf *bp) |
1973 | { | 1975 | { |
1974 | unsigned long flags; | 1976 | unsigned long flags; |
1975 | 1977 | ||
1976 | spin_lock_irqsave(&log_redrive_lock, flags); | 1978 | spin_lock_irqsave(&log_redrive_lock, flags); |
1977 | bp->l_redrive_next = log_redrive_list; | 1979 | bp->l_redrive_next = log_redrive_list; |
1978 | log_redrive_list = bp; | 1980 | log_redrive_list = bp; |
1979 | spin_unlock_irqrestore(&log_redrive_lock, flags); | 1981 | spin_unlock_irqrestore(&log_redrive_lock, flags); |
1980 | 1982 | ||
1981 | wake_up(&jfs_IO_thread_wait); | 1983 | wake_up(&jfs_IO_thread_wait); |
1982 | } | 1984 | } |
1983 | 1985 | ||
1984 | 1986 | ||
1985 | /* | 1987 | /* |
1986 | * lbmRead() | 1988 | * lbmRead() |
1987 | */ | 1989 | */ |
1988 | static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) | 1990 | static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) |
1989 | { | 1991 | { |
1990 | struct bio *bio; | 1992 | struct bio *bio; |
1991 | struct lbuf *bp; | 1993 | struct lbuf *bp; |
1992 | 1994 | ||
1993 | /* | 1995 | /* |
1994 | * allocate a log buffer | 1996 | * allocate a log buffer |
1995 | */ | 1997 | */ |
1996 | *bpp = bp = lbmAllocate(log, pn); | 1998 | *bpp = bp = lbmAllocate(log, pn); |
1997 | jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn); | 1999 | jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn); |
1998 | 2000 | ||
1999 | bp->l_flag |= lbmREAD; | 2001 | bp->l_flag |= lbmREAD; |
2000 | 2002 | ||
2001 | bio = bio_alloc(GFP_NOFS, 1); | 2003 | bio = bio_alloc(GFP_NOFS, 1); |
2002 | 2004 | ||
2003 | bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); | 2005 | bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); |
2004 | bio->bi_bdev = log->bdev; | 2006 | bio->bi_bdev = log->bdev; |
2005 | bio->bi_io_vec[0].bv_page = bp->l_page; | 2007 | bio->bi_io_vec[0].bv_page = bp->l_page; |
2006 | bio->bi_io_vec[0].bv_len = LOGPSIZE; | 2008 | bio->bi_io_vec[0].bv_len = LOGPSIZE; |
2007 | bio->bi_io_vec[0].bv_offset = bp->l_offset; | 2009 | bio->bi_io_vec[0].bv_offset = bp->l_offset; |
2008 | 2010 | ||
2009 | bio->bi_vcnt = 1; | 2011 | bio->bi_vcnt = 1; |
2010 | bio->bi_idx = 0; | 2012 | bio->bi_idx = 0; |
2011 | bio->bi_size = LOGPSIZE; | 2013 | bio->bi_size = LOGPSIZE; |
2012 | 2014 | ||
2013 | bio->bi_end_io = lbmIODone; | 2015 | bio->bi_end_io = lbmIODone; |
2014 | bio->bi_private = bp; | 2016 | bio->bi_private = bp; |
2015 | submit_bio(READ_SYNC, bio); | 2017 | submit_bio(READ_SYNC, bio); |
2016 | 2018 | ||
2017 | wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); | 2019 | wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); |
2018 | 2020 | ||
2019 | return 0; | 2021 | return 0; |
2020 | } | 2022 | } |
2021 | 2023 | ||
2022 | 2024 | ||
2023 | /* | 2025 | /* |
2024 | * lbmWrite() | 2026 | * lbmWrite() |
2025 | * | 2027 | * |
2026 | * buffer at head of pageout queue stays after completion of | 2028 | * buffer at head of pageout queue stays after completion of |
2027 | * partial-page pageout and redriven by explicit initiation of | 2029 | * partial-page pageout and redriven by explicit initiation of |
2028 | * pageout by caller until full-page pageout is completed and | 2030 | * pageout by caller until full-page pageout is completed and |
2029 | * released. | 2031 | * released. |
2030 | * | 2032 | * |
2031 | * device driver i/o done redrives pageout of new buffer at | 2033 | * device driver i/o done redrives pageout of new buffer at |
2032 | * head of pageout queue when current buffer at head of pageout | 2034 | * head of pageout queue when current buffer at head of pageout |
2033 | * queue is released at the completion of its full-page pageout. | 2035 | * queue is released at the completion of its full-page pageout. |
2034 | * | 2036 | * |
2035 | * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). | 2037 | * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). |
2036 | * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone() | 2038 | * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone() |
2037 | */ | 2039 | */ |
2038 | static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, | 2040 | static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, |
2039 | int cant_block) | 2041 | int cant_block) |
2040 | { | 2042 | { |
2041 | struct lbuf *tail; | 2043 | struct lbuf *tail; |
2042 | unsigned long flags; | 2044 | unsigned long flags; |
2043 | 2045 | ||
2044 | jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn); | 2046 | jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn); |
2045 | 2047 | ||
2046 | /* map the logical block address to physical block address */ | 2048 | /* map the logical block address to physical block address */ |
2047 | bp->l_blkno = | 2049 | bp->l_blkno = |
2048 | log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); | 2050 | log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); |
2049 | 2051 | ||
2050 | LCACHE_LOCK(flags); /* disable+lock */ | 2052 | LCACHE_LOCK(flags); /* disable+lock */ |
2051 | 2053 | ||
2052 | /* | 2054 | /* |
2053 | * initialize buffer for device driver | 2055 | * initialize buffer for device driver |
2054 | */ | 2056 | */ |
2055 | bp->l_flag = flag; | 2057 | bp->l_flag = flag; |
2056 | 2058 | ||
2057 | /* | 2059 | /* |
2058 | * insert bp at tail of write queue associated with log | 2060 | * insert bp at tail of write queue associated with log |
2059 | * | 2061 | * |
2060 | * (request is either for bp already/currently at head of queue | 2062 | * (request is either for bp already/currently at head of queue |
2061 | * or new bp to be inserted at tail) | 2063 | * or new bp to be inserted at tail) |
2062 | */ | 2064 | */ |
2063 | tail = log->wqueue; | 2065 | tail = log->wqueue; |
2064 | 2066 | ||
2065 | /* is buffer not already on write queue ? */ | 2067 | /* is buffer not already on write queue ? */ |
2066 | if (bp->l_wqnext == NULL) { | 2068 | if (bp->l_wqnext == NULL) { |
2067 | /* insert at tail of wqueue */ | 2069 | /* insert at tail of wqueue */ |
2068 | if (tail == NULL) { | 2070 | if (tail == NULL) { |
2069 | log->wqueue = bp; | 2071 | log->wqueue = bp; |
2070 | bp->l_wqnext = bp; | 2072 | bp->l_wqnext = bp; |
2071 | } else { | 2073 | } else { |
2072 | log->wqueue = bp; | 2074 | log->wqueue = bp; |
2073 | bp->l_wqnext = tail->l_wqnext; | 2075 | bp->l_wqnext = tail->l_wqnext; |
2074 | tail->l_wqnext = bp; | 2076 | tail->l_wqnext = bp; |
2075 | } | 2077 | } |
2076 | 2078 | ||
2077 | tail = bp; | 2079 | tail = bp; |
2078 | } | 2080 | } |
2079 | 2081 | ||
2080 | /* is buffer at head of wqueue and for write ? */ | 2082 | /* is buffer at head of wqueue and for write ? */ |
2081 | if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) { | 2083 | if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) { |
2082 | LCACHE_UNLOCK(flags); /* unlock+enable */ | 2084 | LCACHE_UNLOCK(flags); /* unlock+enable */ |
2083 | return; | 2085 | return; |
2084 | } | 2086 | } |
2085 | 2087 | ||
2086 | LCACHE_UNLOCK(flags); /* unlock+enable */ | 2088 | LCACHE_UNLOCK(flags); /* unlock+enable */ |
2087 | 2089 | ||
2088 | if (cant_block) | 2090 | if (cant_block) |
2089 | lbmRedrive(bp); | 2091 | lbmRedrive(bp); |
2090 | else if (flag & lbmSYNC) | 2092 | else if (flag & lbmSYNC) |
2091 | lbmStartIO(bp); | 2093 | lbmStartIO(bp); |
2092 | else { | 2094 | else { |
2093 | LOGGC_UNLOCK(log); | 2095 | LOGGC_UNLOCK(log); |
2094 | lbmStartIO(bp); | 2096 | lbmStartIO(bp); |
2095 | LOGGC_LOCK(log); | 2097 | LOGGC_LOCK(log); |
2096 | } | 2098 | } |
2097 | } | 2099 | } |
2098 | 2100 | ||
2099 | 2101 | ||
2100 | /* | 2102 | /* |
2101 | * lbmDirectWrite() | 2103 | * lbmDirectWrite() |
2102 | * | 2104 | * |
2103 | * initiate pageout bypassing write queue for sidestream | 2105 | * initiate pageout bypassing write queue for sidestream |
2104 | * (e.g., log superblock) write; | 2106 | * (e.g., log superblock) write; |
2105 | */ | 2107 | */ |
2106 | static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) | 2108 | static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) |
2107 | { | 2109 | { |
2108 | jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x", | 2110 | jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x", |
2109 | bp, flag, bp->l_pn); | 2111 | bp, flag, bp->l_pn); |
2110 | 2112 | ||
2111 | /* | 2113 | /* |
2112 | * initialize buffer for device driver | 2114 | * initialize buffer for device driver |
2113 | */ | 2115 | */ |
2114 | bp->l_flag = flag | lbmDIRECT; | 2116 | bp->l_flag = flag | lbmDIRECT; |
2115 | 2117 | ||
2116 | /* map the logical block address to physical block address */ | 2118 | /* map the logical block address to physical block address */ |
2117 | bp->l_blkno = | 2119 | bp->l_blkno = |
2118 | log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); | 2120 | log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); |
2119 | 2121 | ||
2120 | /* | 2122 | /* |
2121 | * initiate pageout of the page | 2123 | * initiate pageout of the page |
2122 | */ | 2124 | */ |
2123 | lbmStartIO(bp); | 2125 | lbmStartIO(bp); |
2124 | } | 2126 | } |
2125 | 2127 | ||
2126 | 2128 | ||
2127 | /* | 2129 | /* |
2128 | * NAME: lbmStartIO() | 2130 | * NAME: lbmStartIO() |
2129 | * | 2131 | * |
2130 | * FUNCTION: Interface to DD strategy routine | 2132 | * FUNCTION: Interface to DD strategy routine |
2131 | * | 2133 | * |
2132 | * RETURN: none | 2134 | * RETURN: none |
2133 | * | 2135 | * |
2134 | * serialization: LCACHE_LOCK() is NOT held during log i/o; | 2136 | * serialization: LCACHE_LOCK() is NOT held during log i/o; |
2135 | */ | 2137 | */ |
2136 | static void lbmStartIO(struct lbuf * bp) | 2138 | static void lbmStartIO(struct lbuf * bp) |
2137 | { | 2139 | { |
2138 | struct bio *bio; | 2140 | struct bio *bio; |
2139 | struct jfs_log *log = bp->l_log; | 2141 | struct jfs_log *log = bp->l_log; |
2140 | 2142 | ||
2141 | jfs_info("lbmStartIO\n"); | 2143 | jfs_info("lbmStartIO\n"); |
2142 | 2144 | ||
2143 | bio = bio_alloc(GFP_NOFS, 1); | 2145 | bio = bio_alloc(GFP_NOFS, 1); |
2144 | bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); | 2146 | bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); |
2145 | bio->bi_bdev = log->bdev; | 2147 | bio->bi_bdev = log->bdev; |
2146 | bio->bi_io_vec[0].bv_page = bp->l_page; | 2148 | bio->bi_io_vec[0].bv_page = bp->l_page; |
2147 | bio->bi_io_vec[0].bv_len = LOGPSIZE; | 2149 | bio->bi_io_vec[0].bv_len = LOGPSIZE; |
2148 | bio->bi_io_vec[0].bv_offset = bp->l_offset; | 2150 | bio->bi_io_vec[0].bv_offset = bp->l_offset; |
2149 | 2151 | ||
2150 | bio->bi_vcnt = 1; | 2152 | bio->bi_vcnt = 1; |
2151 | bio->bi_idx = 0; | 2153 | bio->bi_idx = 0; |
2152 | bio->bi_size = LOGPSIZE; | 2154 | bio->bi_size = LOGPSIZE; |
2153 | 2155 | ||
2154 | bio->bi_end_io = lbmIODone; | 2156 | bio->bi_end_io = lbmIODone; |
2155 | bio->bi_private = bp; | 2157 | bio->bi_private = bp; |
2156 | 2158 | ||
2157 | /* check if journaling to disk has been disabled */ | 2159 | /* check if journaling to disk has been disabled */ |
2158 | if (log->no_integrity) { | 2160 | if (log->no_integrity) { |
2159 | bio->bi_size = 0; | 2161 | bio->bi_size = 0; |
2160 | lbmIODone(bio, 0, 0); | 2162 | lbmIODone(bio, 0, 0); |
2161 | } else { | 2163 | } else { |
2162 | submit_bio(WRITE_SYNC, bio); | 2164 | submit_bio(WRITE_SYNC, bio); |
2163 | INCREMENT(lmStat.submitted); | 2165 | INCREMENT(lmStat.submitted); |
2164 | } | 2166 | } |
2165 | } | 2167 | } |
2166 | 2168 | ||
2167 | 2169 | ||
2168 | /* | 2170 | /* |
2169 | * lbmIOWait() | 2171 | * lbmIOWait() |
2170 | */ | 2172 | */ |
2171 | static int lbmIOWait(struct lbuf * bp, int flag) | 2173 | static int lbmIOWait(struct lbuf * bp, int flag) |
2172 | { | 2174 | { |
2173 | unsigned long flags; | 2175 | unsigned long flags; |
2174 | int rc = 0; | 2176 | int rc = 0; |
2175 | 2177 | ||
2176 | jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); | 2178 | jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); |
2177 | 2179 | ||
2178 | LCACHE_LOCK(flags); /* disable+lock */ | 2180 | LCACHE_LOCK(flags); /* disable+lock */ |
2179 | 2181 | ||
2180 | LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags); | 2182 | LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags); |
2181 | 2183 | ||
2182 | rc = (bp->l_flag & lbmERROR) ? -EIO : 0; | 2184 | rc = (bp->l_flag & lbmERROR) ? -EIO : 0; |
2183 | 2185 | ||
2184 | if (flag & lbmFREE) | 2186 | if (flag & lbmFREE) |
2185 | lbmfree(bp); | 2187 | lbmfree(bp); |
2186 | 2188 | ||
2187 | LCACHE_UNLOCK(flags); /* unlock+enable */ | 2189 | LCACHE_UNLOCK(flags); /* unlock+enable */ |
2188 | 2190 | ||
2189 | jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); | 2191 | jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); |
2190 | return rc; | 2192 | return rc; |
2191 | } | 2193 | } |
2192 | 2194 | ||
2193 | /* | 2195 | /* |
2194 | * lbmIODone() | 2196 | * lbmIODone() |
2195 | * | 2197 | * |
2196 | * executed at INTIODONE level | 2198 | * executed at INTIODONE level |
2197 | */ | 2199 | */ |
2198 | static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) | 2200 | static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) |
2199 | { | 2201 | { |
2200 | struct lbuf *bp = bio->bi_private; | 2202 | struct lbuf *bp = bio->bi_private; |
2201 | struct lbuf *nextbp, *tail; | 2203 | struct lbuf *nextbp, *tail; |
2202 | struct jfs_log *log; | 2204 | struct jfs_log *log; |
2203 | unsigned long flags; | 2205 | unsigned long flags; |
2204 | 2206 | ||
2205 | if (bio->bi_size) | 2207 | if (bio->bi_size) |
2206 | return 1; | 2208 | return 1; |
2207 | 2209 | ||
2208 | /* | 2210 | /* |
2209 | * get back jfs buffer bound to the i/o buffer | 2211 | * get back jfs buffer bound to the i/o buffer |
2210 | */ | 2212 | */ |
2211 | jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag); | 2213 | jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag); |
2212 | 2214 | ||
2213 | LCACHE_LOCK(flags); /* disable+lock */ | 2215 | LCACHE_LOCK(flags); /* disable+lock */ |
2214 | 2216 | ||
2215 | bp->l_flag |= lbmDONE; | 2217 | bp->l_flag |= lbmDONE; |
2216 | 2218 | ||
2217 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 2219 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
2218 | bp->l_flag |= lbmERROR; | 2220 | bp->l_flag |= lbmERROR; |
2219 | 2221 | ||
2220 | jfs_err("lbmIODone: I/O error in JFS log"); | 2222 | jfs_err("lbmIODone: I/O error in JFS log"); |
2221 | } | 2223 | } |
2222 | 2224 | ||
2223 | bio_put(bio); | 2225 | bio_put(bio); |
2224 | 2226 | ||
2225 | /* | 2227 | /* |
2226 | * pagein completion | 2228 | * pagein completion |
2227 | */ | 2229 | */ |
2228 | if (bp->l_flag & lbmREAD) { | 2230 | if (bp->l_flag & lbmREAD) { |
2229 | bp->l_flag &= ~lbmREAD; | 2231 | bp->l_flag &= ~lbmREAD; |
2230 | 2232 | ||
2231 | LCACHE_UNLOCK(flags); /* unlock+enable */ | 2233 | LCACHE_UNLOCK(flags); /* unlock+enable */ |
2232 | 2234 | ||
2233 | /* wakeup I/O initiator */ | 2235 | /* wakeup I/O initiator */ |
2234 | LCACHE_WAKEUP(&bp->l_ioevent); | 2236 | LCACHE_WAKEUP(&bp->l_ioevent); |
2235 | 2237 | ||
2236 | return 0; | 2238 | return 0; |
2237 | } | 2239 | } |
2238 | 2240 | ||
2239 | /* | 2241 | /* |
2240 | * pageout completion | 2242 | * pageout completion |
2241 | * | 2243 | * |
2242 | * the bp at the head of write queue has completed pageout. | 2244 | * the bp at the head of write queue has completed pageout. |
2243 | * | 2245 | * |
2244 | * if single-commit/full-page pageout, remove the current buffer | 2246 | * if single-commit/full-page pageout, remove the current buffer |
2245 | * from head of pageout queue, and redrive pageout with | 2247 | * from head of pageout queue, and redrive pageout with |
2246 | * the new buffer at head of pageout queue; | 2248 | * the new buffer at head of pageout queue; |
2247 | * otherwise, the partial-page pageout buffer stays at | 2249 | * otherwise, the partial-page pageout buffer stays at |
2248 | * the head of pageout queue to be redriven for pageout | 2250 | * the head of pageout queue to be redriven for pageout |
2249 | * by lmGroupCommit() until full-page pageout is completed. | 2251 | * by lmGroupCommit() until full-page pageout is completed. |
2250 | */ | 2252 | */ |
2251 | bp->l_flag &= ~lbmWRITE; | 2253 | bp->l_flag &= ~lbmWRITE; |
2252 | INCREMENT(lmStat.pagedone); | 2254 | INCREMENT(lmStat.pagedone); |
2253 | 2255 | ||
2254 | /* update committed lsn */ | 2256 | /* update committed lsn */ |
2255 | log = bp->l_log; | 2257 | log = bp->l_log; |
2256 | log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor; | 2258 | log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor; |
2257 | 2259 | ||
2258 | if (bp->l_flag & lbmDIRECT) { | 2260 | if (bp->l_flag & lbmDIRECT) { |
2259 | LCACHE_WAKEUP(&bp->l_ioevent); | 2261 | LCACHE_WAKEUP(&bp->l_ioevent); |
2260 | LCACHE_UNLOCK(flags); | 2262 | LCACHE_UNLOCK(flags); |
2261 | return 0; | 2263 | return 0; |
2262 | } | 2264 | } |
2263 | 2265 | ||
2264 | tail = log->wqueue; | 2266 | tail = log->wqueue; |
2265 | 2267 | ||
2266 | /* single element queue */ | 2268 | /* single element queue */ |
2267 | if (bp == tail) { | 2269 | if (bp == tail) { |
2268 | /* remove head buffer of full-page pageout | 2270 | /* remove head buffer of full-page pageout |
2269 | * from log device write queue | 2271 | * from log device write queue |
2270 | */ | 2272 | */ |
2271 | if (bp->l_flag & lbmRELEASE) { | 2273 | if (bp->l_flag & lbmRELEASE) { |
2272 | log->wqueue = NULL; | 2274 | log->wqueue = NULL; |
2273 | bp->l_wqnext = NULL; | 2275 | bp->l_wqnext = NULL; |
2274 | } | 2276 | } |
2275 | } | 2277 | } |
2276 | /* multi element queue */ | 2278 | /* multi element queue */ |
2277 | else { | 2279 | else { |
2278 | /* remove head buffer of full-page pageout | 2280 | /* remove head buffer of full-page pageout |
2279 | * from log device write queue | 2281 | * from log device write queue |
2280 | */ | 2282 | */ |
2281 | if (bp->l_flag & lbmRELEASE) { | 2283 | if (bp->l_flag & lbmRELEASE) { |
2282 | nextbp = tail->l_wqnext = bp->l_wqnext; | 2284 | nextbp = tail->l_wqnext = bp->l_wqnext; |
2283 | bp->l_wqnext = NULL; | 2285 | bp->l_wqnext = NULL; |
2284 | 2286 | ||
2285 | /* | 2287 | /* |
2286 | * redrive pageout of next page at head of write queue: | 2288 | * redrive pageout of next page at head of write queue: |
2287 | * redrive next page without any bound tblk | 2289 | * redrive next page without any bound tblk |
2288 | * (i.e., page w/o any COMMIT records), or | 2290 | * (i.e., page w/o any COMMIT records), or |
2289 | * first page of new group commit which has been | 2291 | * first page of new group commit which has been |
2290 | * queued after current page (subsequent pageout | 2292 | * queued after current page (subsequent pageout |
2291 | * is performed synchronously, except page without | 2293 | * is performed synchronously, except page without |
2292 | * any COMMITs) by lmGroupCommit() as indicated | 2294 | * any COMMITs) by lmGroupCommit() as indicated |
2293 | * by lbmWRITE flag; | 2295 | * by lbmWRITE flag; |
2294 | */ | 2296 | */ |
2295 | if (nextbp->l_flag & lbmWRITE) { | 2297 | if (nextbp->l_flag & lbmWRITE) { |
2296 | /* | 2298 | /* |
2297 | * We can't do the I/O at interrupt time. | 2299 | * We can't do the I/O at interrupt time. |
2298 | * The jfsIO thread can do it | 2300 | * The jfsIO thread can do it |
2299 | */ | 2301 | */ |
2300 | lbmRedrive(nextbp); | 2302 | lbmRedrive(nextbp); |
2301 | } | 2303 | } |
2302 | } | 2304 | } |
2303 | } | 2305 | } |
2304 | 2306 | ||
2305 | /* | 2307 | /* |
2306 | * synchronous pageout: | 2308 | * synchronous pageout: |
2307 | * | 2309 | * |
2308 | * buffer has not necessarily been removed from write queue | 2310 | * buffer has not necessarily been removed from write queue |
2309 | * (e.g., synchronous write of partial-page with COMMIT): | 2311 | * (e.g., synchronous write of partial-page with COMMIT): |
2310 | * leave buffer for i/o initiator to dispose | 2312 | * leave buffer for i/o initiator to dispose |
2311 | */ | 2313 | */ |
2312 | if (bp->l_flag & lbmSYNC) { | 2314 | if (bp->l_flag & lbmSYNC) { |
2313 | LCACHE_UNLOCK(flags); /* unlock+enable */ | 2315 | LCACHE_UNLOCK(flags); /* unlock+enable */ |
2314 | 2316 | ||
2315 | /* wakeup I/O initiator */ | 2317 | /* wakeup I/O initiator */ |
2316 | LCACHE_WAKEUP(&bp->l_ioevent); | 2318 | LCACHE_WAKEUP(&bp->l_ioevent); |
2317 | } | 2319 | } |
2318 | 2320 | ||
2319 | /* | 2321 | /* |
2320 | * Group Commit pageout: | 2322 | * Group Commit pageout: |
2321 | */ | 2323 | */ |
2322 | else if (bp->l_flag & lbmGC) { | 2324 | else if (bp->l_flag & lbmGC) { |
2323 | LCACHE_UNLOCK(flags); | 2325 | LCACHE_UNLOCK(flags); |
2324 | lmPostGC(bp); | 2326 | lmPostGC(bp); |
2325 | } | 2327 | } |
2326 | 2328 | ||
2327 | /* | 2329 | /* |
2328 | * asynchronous pageout: | 2330 | * asynchronous pageout: |
2329 | * | 2331 | * |
2330 | * buffer must have been removed from write queue: | 2332 | * buffer must have been removed from write queue: |
2331 | * insert buffer at head of freelist where it can be recycled | 2333 | * insert buffer at head of freelist where it can be recycled |
2332 | */ | 2334 | */ |
2333 | else { | 2335 | else { |
2334 | assert(bp->l_flag & lbmRELEASE); | 2336 | assert(bp->l_flag & lbmRELEASE); |
2335 | assert(bp->l_flag & lbmFREE); | 2337 | assert(bp->l_flag & lbmFREE); |
2336 | lbmfree(bp); | 2338 | lbmfree(bp); |
2337 | 2339 | ||
2338 | LCACHE_UNLOCK(flags); /* unlock+enable */ | 2340 | LCACHE_UNLOCK(flags); /* unlock+enable */ |
2339 | } | 2341 | } |
2340 | 2342 | ||
2341 | return 0; | 2343 | return 0; |
2342 | } | 2344 | } |
2343 | 2345 | ||
2344 | int jfsIOWait(void *arg) | 2346 | int jfsIOWait(void *arg) |
2345 | { | 2347 | { |
2346 | struct lbuf *bp; | 2348 | struct lbuf *bp; |
2347 | 2349 | ||
2348 | daemonize("jfsIO"); | 2350 | daemonize("jfsIO"); |
2349 | 2351 | ||
2350 | complete(&jfsIOwait); | 2352 | complete(&jfsIOwait); |
2351 | 2353 | ||
2352 | do { | 2354 | do { |
2353 | DECLARE_WAITQUEUE(wq, current); | 2355 | DECLARE_WAITQUEUE(wq, current); |
2354 | 2356 | ||
2355 | spin_lock_irq(&log_redrive_lock); | 2357 | spin_lock_irq(&log_redrive_lock); |
2356 | while ((bp = log_redrive_list) != 0) { | 2358 | while ((bp = log_redrive_list) != 0) { |
2357 | log_redrive_list = bp->l_redrive_next; | 2359 | log_redrive_list = bp->l_redrive_next; |
2358 | bp->l_redrive_next = NULL; | 2360 | bp->l_redrive_next = NULL; |
2359 | spin_unlock_irq(&log_redrive_lock); | 2361 | spin_unlock_irq(&log_redrive_lock); |
2360 | lbmStartIO(bp); | 2362 | lbmStartIO(bp); |
2361 | spin_lock_irq(&log_redrive_lock); | 2363 | spin_lock_irq(&log_redrive_lock); |
2362 | } | 2364 | } |
2363 | if (freezing(current)) { | 2365 | if (freezing(current)) { |
2364 | spin_unlock_irq(&log_redrive_lock); | 2366 | spin_unlock_irq(&log_redrive_lock); |
2365 | refrigerator(); | 2367 | refrigerator(); |
2366 | } else { | 2368 | } else { |
2367 | add_wait_queue(&jfs_IO_thread_wait, &wq); | 2369 | add_wait_queue(&jfs_IO_thread_wait, &wq); |
2368 | set_current_state(TASK_INTERRUPTIBLE); | 2370 | set_current_state(TASK_INTERRUPTIBLE); |
2369 | spin_unlock_irq(&log_redrive_lock); | 2371 | spin_unlock_irq(&log_redrive_lock); |
2370 | schedule(); | 2372 | schedule(); |
2371 | current->state = TASK_RUNNING; | 2373 | current->state = TASK_RUNNING; |
2372 | remove_wait_queue(&jfs_IO_thread_wait, &wq); | 2374 | remove_wait_queue(&jfs_IO_thread_wait, &wq); |
2373 | } | 2375 | } |
2374 | } while (!jfs_stop_threads); | 2376 | } while (!jfs_stop_threads); |
2375 | 2377 | ||
2376 | jfs_info("jfsIOWait being killed!"); | 2378 | jfs_info("jfsIOWait being killed!"); |
2377 | complete_and_exit(&jfsIOwait, 0); | 2379 | complete_and_exit(&jfsIOwait, 0); |
2378 | } | 2380 | } |
2379 | 2381 | ||
2380 | /* | 2382 | /* |
2381 | * NAME: lmLogFormat()/jfs_logform() | 2383 | * NAME: lmLogFormat()/jfs_logform() |
2382 | * | 2384 | * |
2383 | * FUNCTION: format file system log | 2385 | * FUNCTION: format file system log |
2384 | * | 2386 | * |
2385 | * PARAMETERS: | 2387 | * PARAMETERS: |
2386 | * log - volume log | 2388 | * log - volume log |
2387 | * logAddress - start address of log space in FS block | 2389 | * logAddress - start address of log space in FS block |
2388 | * logSize - length of log space in FS block; | 2390 | * logSize - length of log space in FS block; |
2389 | * | 2391 | * |
2390 | * RETURN: 0 - success | 2392 | * RETURN: 0 - success |
2391 | * -EIO - i/o error | 2393 | * -EIO - i/o error |
2392 | * | 2394 | * |
2393 | * XXX: We're synchronously writing one page at a time. This needs to | 2395 | * XXX: We're synchronously writing one page at a time. This needs to |
2394 | * be improved by writing multiple pages at once. | 2396 | * be improved by writing multiple pages at once. |
2395 | */ | 2397 | */ |
2396 | int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) | 2398 | int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) |
2397 | { | 2399 | { |
2398 | int rc = -EIO; | 2400 | int rc = -EIO; |
2399 | struct jfs_sb_info *sbi; | 2401 | struct jfs_sb_info *sbi; |
2400 | struct logsuper *logsuper; | 2402 | struct logsuper *logsuper; |
2401 | struct logpage *lp; | 2403 | struct logpage *lp; |
2402 | int lspn; /* log sequence page number */ | 2404 | int lspn; /* log sequence page number */ |
2403 | struct lrd *lrd_ptr; | 2405 | struct lrd *lrd_ptr; |
2404 | int npages = 0; | 2406 | int npages = 0; |
2405 | struct lbuf *bp; | 2407 | struct lbuf *bp; |
2406 | 2408 | ||
2407 | jfs_info("lmLogFormat: logAddress:%Ld logSize:%d", | 2409 | jfs_info("lmLogFormat: logAddress:%Ld logSize:%d", |
2408 | (long long)logAddress, logSize); | 2410 | (long long)logAddress, logSize); |
2409 | 2411 | ||
2410 | sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list); | 2412 | sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list); |
2411 | 2413 | ||
2412 | /* allocate a log buffer */ | 2414 | /* allocate a log buffer */ |
2413 | bp = lbmAllocate(log, 1); | 2415 | bp = lbmAllocate(log, 1); |
2414 | 2416 | ||
2415 | npages = logSize >> sbi->l2nbperpage; | 2417 | npages = logSize >> sbi->l2nbperpage; |
2416 | 2418 | ||
2417 | /* | 2419 | /* |
2418 | * log space: | 2420 | * log space: |
2419 | * | 2421 | * |
2420 | * page 0 - reserved; | 2422 | * page 0 - reserved; |
2421 | * page 1 - log superblock; | 2423 | * page 1 - log superblock; |
2422 | * page 2 - log data page: A SYNC log record is written | 2424 | * page 2 - log data page: A SYNC log record is written |
2423 | * into this page at logform time; | 2425 | * into this page at logform time; |
2424 | * pages 3-N - log data page: set to empty log data pages; | 2426 | * pages 3-N - log data page: set to empty log data pages; |
2425 | */ | 2427 | */ |
2426 | /* | 2428 | /* |
2427 | * init log superblock: log page 1 | 2429 | * init log superblock: log page 1 |
2428 | */ | 2430 | */ |
2429 | logsuper = (struct logsuper *) bp->l_ldata; | 2431 | logsuper = (struct logsuper *) bp->l_ldata; |
2430 | 2432 | ||
2431 | logsuper->magic = cpu_to_le32(LOGMAGIC); | 2433 | logsuper->magic = cpu_to_le32(LOGMAGIC); |
2432 | logsuper->version = cpu_to_le32(LOGVERSION); | 2434 | logsuper->version = cpu_to_le32(LOGVERSION); |
2433 | logsuper->state = cpu_to_le32(LOGREDONE); | 2435 | logsuper->state = cpu_to_le32(LOGREDONE); |
2434 | logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */ | 2436 | logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */ |
2435 | logsuper->size = cpu_to_le32(npages); | 2437 | logsuper->size = cpu_to_le32(npages); |
2436 | logsuper->bsize = cpu_to_le32(sbi->bsize); | 2438 | logsuper->bsize = cpu_to_le32(sbi->bsize); |
2437 | logsuper->l2bsize = cpu_to_le32(sbi->l2bsize); | 2439 | logsuper->l2bsize = cpu_to_le32(sbi->l2bsize); |
2438 | logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE); | 2440 | logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE); |
2439 | 2441 | ||
2440 | bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; | 2442 | bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; |
2441 | bp->l_blkno = logAddress + sbi->nbperpage; | 2443 | bp->l_blkno = logAddress + sbi->nbperpage; |
2442 | lbmStartIO(bp); | 2444 | lbmStartIO(bp); |
2443 | if ((rc = lbmIOWait(bp, 0))) | 2445 | if ((rc = lbmIOWait(bp, 0))) |
2444 | goto exit; | 2446 | goto exit; |
2445 | 2447 | ||
2446 | /* | 2448 | /* |
2447 | * init pages 2 to npages-1 as log data pages: | 2449 | * init pages 2 to npages-1 as log data pages: |
2448 | * | 2450 | * |
2449 | * log page sequence number (lpsn) initialization: | 2451 | * log page sequence number (lpsn) initialization: |
2450 | * | 2452 | * |
2451 | * pn: 0 1 2 3 n-1 | 2453 | * pn: 0 1 2 3 n-1 |
2452 | * +-----+-----+=====+=====+===.....===+=====+ | 2454 | * +-----+-----+=====+=====+===.....===+=====+ |
2453 | * lspn: N-1 0 1 N-2 | 2455 | * lspn: N-1 0 1 N-2 |
2454 | * <--- N page circular file ----> | 2456 | * <--- N page circular file ----> |
2455 | * | 2457 | * |
2456 | * the N (= npages-2) data pages of the log is maintained as | 2458 | * the N (= npages-2) data pages of the log is maintained as |
2457 | * a circular file for the log records; | 2459 | * a circular file for the log records; |
2458 | * lpsn grows by 1 monotonically as each log page is written | 2460 | * lpsn grows by 1 monotonically as each log page is written |
2459 | * to the circular file of the log; | 2461 | * to the circular file of the log; |
2460 | * and setLogpage() will not reset the page number even if | 2462 | * and setLogpage() will not reset the page number even if |
2461 | * the eor is equal to LOGPHDRSIZE. In order for binary search | 2463 | * the eor is equal to LOGPHDRSIZE. In order for binary search |
2462 | * still work in find log end process, we have to simulate the | 2464 | * still work in find log end process, we have to simulate the |
2463 | * log wrap situation at the log format time. | 2465 | * log wrap situation at the log format time. |
2464 | * The 1st log page written will have the highest lpsn. Then | 2466 | * The 1st log page written will have the highest lpsn. Then |
2465 | * the succeeding log pages will have ascending order of | 2467 | * the succeeding log pages will have ascending order of |
2466 | * the lspn starting from 0, ... (N-2) | 2468 | * the lspn starting from 0, ... (N-2) |
2467 | */ | 2469 | */ |
2468 | lp = (struct logpage *) bp->l_ldata; | 2470 | lp = (struct logpage *) bp->l_ldata; |
2469 | /* | 2471 | /* |
2470 | * initialize 1st log page to be written: lpsn = N - 1, | 2472 | * initialize 1st log page to be written: lpsn = N - 1, |
2471 | * write a SYNCPT log record is written to this page | 2473 | * write a SYNCPT log record is written to this page |
2472 | */ | 2474 | */ |
2473 | lp->h.page = lp->t.page = cpu_to_le32(npages - 3); | 2475 | lp->h.page = lp->t.page = cpu_to_le32(npages - 3); |
2474 | lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE); | 2476 | lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE); |
2475 | 2477 | ||
2476 | lrd_ptr = (struct lrd *) &lp->data; | 2478 | lrd_ptr = (struct lrd *) &lp->data; |
2477 | lrd_ptr->logtid = 0; | 2479 | lrd_ptr->logtid = 0; |
2478 | lrd_ptr->backchain = 0; | 2480 | lrd_ptr->backchain = 0; |
2479 | lrd_ptr->type = cpu_to_le16(LOG_SYNCPT); | 2481 | lrd_ptr->type = cpu_to_le16(LOG_SYNCPT); |
2480 | lrd_ptr->length = 0; | 2482 | lrd_ptr->length = 0; |
2481 | lrd_ptr->log.syncpt.sync = 0; | 2483 | lrd_ptr->log.syncpt.sync = 0; |
2482 | 2484 | ||
2483 | bp->l_blkno += sbi->nbperpage; | 2485 | bp->l_blkno += sbi->nbperpage; |
2484 | bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; | 2486 | bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; |
2485 | lbmStartIO(bp); | 2487 | lbmStartIO(bp); |
2486 | if ((rc = lbmIOWait(bp, 0))) | 2488 | if ((rc = lbmIOWait(bp, 0))) |
2487 | goto exit; | 2489 | goto exit; |
2488 | 2490 | ||
2489 | /* | 2491 | /* |
2490 | * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) | 2492 | * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) |
2491 | */ | 2493 | */ |
2492 | for (lspn = 0; lspn < npages - 3; lspn++) { | 2494 | for (lspn = 0; lspn < npages - 3; lspn++) { |
2493 | lp->h.page = lp->t.page = cpu_to_le32(lspn); | 2495 | lp->h.page = lp->t.page = cpu_to_le32(lspn); |
2494 | lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); | 2496 | lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); |
2495 | 2497 | ||
2496 | bp->l_blkno += sbi->nbperpage; | 2498 | bp->l_blkno += sbi->nbperpage; |
2497 | bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; | 2499 | bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; |
2498 | lbmStartIO(bp); | 2500 | lbmStartIO(bp); |
2499 | if ((rc = lbmIOWait(bp, 0))) | 2501 | if ((rc = lbmIOWait(bp, 0))) |
2500 | goto exit; | 2502 | goto exit; |
2501 | } | 2503 | } |
2502 | 2504 | ||
2503 | rc = 0; | 2505 | rc = 0; |
2504 | exit: | 2506 | exit: |
2505 | /* | 2507 | /* |
2506 | * finalize log | 2508 | * finalize log |
2507 | */ | 2509 | */ |
2508 | /* release the buffer */ | 2510 | /* release the buffer */ |
2509 | lbmFree(bp); | 2511 | lbmFree(bp); |
2510 | 2512 | ||
2511 | return rc; | 2513 | return rc; |
2512 | } | 2514 | } |
2513 | 2515 | ||
2514 | #ifdef CONFIG_JFS_STATISTICS | 2516 | #ifdef CONFIG_JFS_STATISTICS |
2515 | int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, | 2517 | int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, |
2516 | int *eof, void *data) | 2518 | int *eof, void *data) |
2517 | { | 2519 | { |
2518 | int len = 0; | 2520 | int len = 0; |
2519 | off_t begin; | 2521 | off_t begin; |
2520 | 2522 | ||
2521 | len += sprintf(buffer, | 2523 | len += sprintf(buffer, |
2522 | "JFS Logmgr stats\n" | 2524 | "JFS Logmgr stats\n" |
2523 | "================\n" | 2525 | "================\n" |
2524 | "commits = %d\n" | 2526 | "commits = %d\n" |
2525 | "writes submitted = %d\n" | 2527 | "writes submitted = %d\n" |
2526 | "writes completed = %d\n" | 2528 | "writes completed = %d\n" |
2527 | "full pages submitted = %d\n" | 2529 | "full pages submitted = %d\n" |
2528 | "partial pages submitted = %d\n", | 2530 | "partial pages submitted = %d\n", |
2529 | lmStat.commit, | 2531 | lmStat.commit, |
2530 | lmStat.submitted, | 2532 | lmStat.submitted, |
2531 | lmStat.pagedone, | 2533 | lmStat.pagedone, |
2532 | lmStat.full_page, | 2534 | lmStat.full_page, |
2533 | lmStat.partial_page); | 2535 | lmStat.partial_page); |
2534 | 2536 | ||
2535 | begin = offset; | 2537 | begin = offset; |
2536 | *start = buffer + begin; | 2538 | *start = buffer + begin; |
2537 | len -= begin; | 2539 | len -= begin; |
2538 | 2540 | ||
2539 | if (len > length) | 2541 | if (len > length) |
2540 | len = length; | 2542 | len = length; |
2541 | else | 2543 | else |
2542 | *eof = 1; | 2544 | *eof = 1; |
2543 | 2545 | ||
2544 | if (len < 0) | 2546 | if (len < 0) |
fs/jfs/jfs_logmgr.h
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2004 | 2 | * Copyright (C) International Business Machines Corp., 2000-2004 |
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | 3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #ifndef _H_JFS_LOGMGR | 19 | #ifndef _H_JFS_LOGMGR |
20 | #define _H_JFS_LOGMGR | 20 | #define _H_JFS_LOGMGR |
21 | 21 | ||
22 | #include "jfs_filsys.h" | 22 | #include "jfs_filsys.h" |
23 | #include "jfs_lock.h" | 23 | #include "jfs_lock.h" |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * log manager configuration parameters | 26 | * log manager configuration parameters |
27 | */ | 27 | */ |
28 | 28 | ||
29 | /* log page size */ | 29 | /* log page size */ |
30 | #define LOGPSIZE 4096 | 30 | #define LOGPSIZE 4096 |
31 | #define L2LOGPSIZE 12 | 31 | #define L2LOGPSIZE 12 |
32 | 32 | ||
33 | #define LOGPAGES 16 /* Log pages per mounted file system */ | 33 | #define LOGPAGES 16 /* Log pages per mounted file system */ |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * log logical volume | 36 | * log logical volume |
37 | * | 37 | * |
38 | * a log is used to make the commit operation on journalled | 38 | * a log is used to make the commit operation on journalled |
39 | * files within the same logical volume group atomic. | 39 | * files within the same logical volume group atomic. |
40 | * a log is implemented with a logical volume. | 40 | * a log is implemented with a logical volume. |
41 | * there is one log per logical volume group. | 41 | * there is one log per logical volume group. |
42 | * | 42 | * |
43 | * block 0 of the log logical volume is not used (ipl etc). | 43 | * block 0 of the log logical volume is not used (ipl etc). |
44 | * block 1 contains a log "superblock" and is used by logFormat(), | 44 | * block 1 contains a log "superblock" and is used by logFormat(), |
45 | * lmLogInit(), lmLogShutdown(), and logRedo() to record status | 45 | * lmLogInit(), lmLogShutdown(), and logRedo() to record status |
46 | * of the log but is not otherwise used during normal processing. | 46 | * of the log but is not otherwise used during normal processing. |
47 | * blocks 2 - (N-1) are used to contain log records. | 47 | * blocks 2 - (N-1) are used to contain log records. |
48 | * | 48 | * |
49 | * when a volume group is varied-on-line, logRedo() must have | 49 | * when a volume group is varied-on-line, logRedo() must have |
50 | * been executed before the file systems (logical volumes) in | 50 | * been executed before the file systems (logical volumes) in |
51 | * the volume group can be mounted. | 51 | * the volume group can be mounted. |
52 | */ | 52 | */ |
53 | /* | 53 | /* |
54 | * log superblock (block 1 of logical volume) | 54 | * log superblock (block 1 of logical volume) |
55 | */ | 55 | */ |
56 | #define LOGSUPER_B 1 | 56 | #define LOGSUPER_B 1 |
57 | #define LOGSTART_B 2 | 57 | #define LOGSTART_B 2 |
58 | 58 | ||
59 | #define LOGMAGIC 0x87654321 | 59 | #define LOGMAGIC 0x87654321 |
60 | #define LOGVERSION 1 | 60 | #define LOGVERSION 1 |
61 | 61 | ||
62 | #define MAX_ACTIVE 128 /* Max active file systems sharing log */ | 62 | #define MAX_ACTIVE 128 /* Max active file systems sharing log */ |
63 | 63 | ||
64 | struct logsuper { | 64 | struct logsuper { |
65 | __le32 magic; /* 4: log lv identifier */ | 65 | __le32 magic; /* 4: log lv identifier */ |
66 | __le32 version; /* 4: version number */ | 66 | __le32 version; /* 4: version number */ |
67 | __le32 serial; /* 4: log open/mount counter */ | 67 | __le32 serial; /* 4: log open/mount counter */ |
68 | __le32 size; /* 4: size in number of LOGPSIZE blocks */ | 68 | __le32 size; /* 4: size in number of LOGPSIZE blocks */ |
69 | __le32 bsize; /* 4: logical block size in byte */ | 69 | __le32 bsize; /* 4: logical block size in byte */ |
70 | __le32 l2bsize; /* 4: log2 of bsize */ | 70 | __le32 l2bsize; /* 4: log2 of bsize */ |
71 | 71 | ||
72 | __le32 flag; /* 4: option */ | 72 | __le32 flag; /* 4: option */ |
73 | __le32 state; /* 4: state - see below */ | 73 | __le32 state; /* 4: state - see below */ |
74 | 74 | ||
75 | __le32 end; /* 4: addr of last log record set by logredo */ | 75 | __le32 end; /* 4: addr of last log record set by logredo */ |
76 | char uuid[16]; /* 16: 128-bit journal uuid */ | 76 | char uuid[16]; /* 16: 128-bit journal uuid */ |
77 | char label[16]; /* 16: journal label */ | 77 | char label[16]; /* 16: journal label */ |
78 | struct { | 78 | struct { |
79 | char uuid[16]; | 79 | char uuid[16]; |
80 | } active[MAX_ACTIVE]; /* 2048: active file systems list */ | 80 | } active[MAX_ACTIVE]; /* 2048: active file systems list */ |
81 | }; | 81 | }; |
82 | 82 | ||
83 | #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | 83 | #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" |
84 | 84 | ||
85 | /* log flag: commit option (see jfs_filsys.h) */ | 85 | /* log flag: commit option (see jfs_filsys.h) */ |
86 | 86 | ||
87 | /* log state */ | 87 | /* log state */ |
88 | #define LOGMOUNT 0 /* log mounted by lmLogInit() */ | 88 | #define LOGMOUNT 0 /* log mounted by lmLogInit() */ |
89 | #define LOGREDONE 1 /* log shutdown by lmLogShutdown(). | 89 | #define LOGREDONE 1 /* log shutdown by lmLogShutdown(). |
90 | * log redo completed by logredo(). | 90 | * log redo completed by logredo(). |
91 | */ | 91 | */ |
92 | #define LOGWRAP 2 /* log wrapped */ | 92 | #define LOGWRAP 2 /* log wrapped */ |
93 | #define LOGREADERR 3 /* log read error detected in logredo() */ | 93 | #define LOGREADERR 3 /* log read error detected in logredo() */ |
94 | 94 | ||
95 | 95 | ||
96 | /* | 96 | /* |
97 | * log logical page | 97 | * log logical page |
98 | * | 98 | * |
99 | * (this comment should be rewritten !) | 99 | * (this comment should be rewritten !) |
100 | * the header and trailer structures (h,t) will normally have | 100 | * the header and trailer structures (h,t) will normally have |
101 | * the same page and eor value. | 101 | * the same page and eor value. |
102 | * An exception to this occurs when a complete page write is not | 102 | * An exception to this occurs when a complete page write is not |
103 | * accomplished on a power failure. Since the hardware may "split write" | 103 | * accomplished on a power failure. Since the hardware may "split write" |
104 | * sectors in the page, any out of order sequence may occur during powerfail | 104 | * sectors in the page, any out of order sequence may occur during powerfail |
105 | * and needs to be recognized during log replay. The xor value is | 105 | * and needs to be recognized during log replay. The xor value is |
106 | * an "exclusive or" of all log words in the page up to eor. This | 106 | * an "exclusive or" of all log words in the page up to eor. This |
107 | * 32 bit eor is stored with the top 16 bits in the header and the | 107 | * 32 bit eor is stored with the top 16 bits in the header and the |
108 | * bottom 16 bits in the trailer. logredo can easily recognize pages | 108 | * bottom 16 bits in the trailer. logredo can easily recognize pages |
109 | * that were not completed by reconstructing this eor and checking | 109 | * that were not completed by reconstructing this eor and checking |
110 | * the log page. | 110 | * the log page. |
111 | * | 111 | * |
112 | * Previous versions of the operating system did not allow split | 112 | * Previous versions of the operating system did not allow split |
113 | * writes and detected partially written records in logredo by | 113 | * writes and detected partially written records in logredo by |
114 | * ordering the updates to the header, trailer, and the move of data | 114 | * ordering the updates to the header, trailer, and the move of data |
115 | * into the logdata area. The order: (1) data is moved (2) header | 115 | * into the logdata area. The order: (1) data is moved (2) header |
116 | * is updated (3) trailer is updated. In logredo, when the header | 116 | * is updated (3) trailer is updated. In logredo, when the header |
117 | * differed from the trailer, the header and trailer were reconciled | 117 | * differed from the trailer, the header and trailer were reconciled |
118 | * as follows: if h.page != t.page they were set to the smaller of | 118 | * as follows: if h.page != t.page they were set to the smaller of |
119 | * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) | 119 | * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) |
120 | * h.eor != t.eor they were set to the smaller of their two values. | 120 | * h.eor != t.eor they were set to the smaller of their two values. |
121 | */ | 121 | */ |
122 | struct logpage { | 122 | struct logpage { |
123 | struct { /* header */ | 123 | struct { /* header */ |
124 | __le32 page; /* 4: log sequence page number */ | 124 | __le32 page; /* 4: log sequence page number */ |
125 | __le16 rsrvd; /* 2: */ | 125 | __le16 rsrvd; /* 2: */ |
126 | __le16 eor; /* 2: end-of-log offset of lasrt record write */ | 126 | __le16 eor; /* 2: end-of-log offset of lasrt record write */ |
127 | } h; | 127 | } h; |
128 | 128 | ||
129 | __le32 data[LOGPSIZE / 4 - 4]; /* log record area */ | 129 | __le32 data[LOGPSIZE / 4 - 4]; /* log record area */ |
130 | 130 | ||
131 | struct { /* trailer */ | 131 | struct { /* trailer */ |
132 | __le32 page; /* 4: normally the same as h.page */ | 132 | __le32 page; /* 4: normally the same as h.page */ |
133 | __le16 rsrvd; /* 2: */ | 133 | __le16 rsrvd; /* 2: */ |
134 | __le16 eor; /* 2: normally the same as h.eor */ | 134 | __le16 eor; /* 2: normally the same as h.eor */ |
135 | } t; | 135 | } t; |
136 | }; | 136 | }; |
137 | 137 | ||
138 | #define LOGPHDRSIZE 8 /* log page header size */ | 138 | #define LOGPHDRSIZE 8 /* log page header size */ |
139 | #define LOGPTLRSIZE 8 /* log page trailer size */ | 139 | #define LOGPTLRSIZE 8 /* log page trailer size */ |
140 | 140 | ||
141 | 141 | ||
142 | /* | 142 | /* |
143 | * log record | 143 | * log record |
144 | * | 144 | * |
145 | * (this comment should be rewritten !) | 145 | * (this comment should be rewritten !) |
146 | * jfs uses only "after" log records (only a single writer is allowed | 146 | * jfs uses only "after" log records (only a single writer is allowed |
147 | * in a page, pages are written to temporary paging space if | 147 | * in a page, pages are written to temporary paging space if |
148 | * if they must be written to disk before commit, and i/o is | 148 | * if they must be written to disk before commit, and i/o is |
149 | * scheduled for modified pages to their home location after | 149 | * scheduled for modified pages to their home location after |
150 | * the log records containing the after values and the commit | 150 | * the log records containing the after values and the commit |
151 | * record is written to the log on disk, undo discards the copy | 151 | * record is written to the log on disk, undo discards the copy |
152 | * in main-memory.) | 152 | * in main-memory.) |
153 | * | 153 | * |
154 | * a log record consists of a data area of variable length followed by | 154 | * a log record consists of a data area of variable length followed by |
155 | * a descriptor of fixed size LOGRDSIZE bytes. | 155 | * a descriptor of fixed size LOGRDSIZE bytes. |
156 | * the data area is rounded up to an integral number of 4-bytes and | 156 | * the data area is rounded up to an integral number of 4-bytes and |
157 | * must be no longer than LOGPSIZE. | 157 | * must be no longer than LOGPSIZE. |
158 | * the descriptor is of size of multiple of 4-bytes and aligned on a | 158 | * the descriptor is of size of multiple of 4-bytes and aligned on a |
159 | * 4-byte boundary. | 159 | * 4-byte boundary. |
160 | * records are packed one after the other in the data area of log pages. | 160 | * records are packed one after the other in the data area of log pages. |
161 | * (sometimes a DUMMY record is inserted so that at least one record ends | 161 | * (sometimes a DUMMY record is inserted so that at least one record ends |
162 | * on every page or the longest record is placed on at most two pages). | 162 | * on every page or the longest record is placed on at most two pages). |
163 | * the field eor in page header/trailer points to the byte following | 163 | * the field eor in page header/trailer points to the byte following |
164 | * the last record on a page. | 164 | * the last record on a page. |
165 | */ | 165 | */ |
166 | 166 | ||
167 | /* log record types */ | 167 | /* log record types */ |
168 | #define LOG_COMMIT 0x8000 | 168 | #define LOG_COMMIT 0x8000 |
169 | #define LOG_SYNCPT 0x4000 | 169 | #define LOG_SYNCPT 0x4000 |
170 | #define LOG_MOUNT 0x2000 | 170 | #define LOG_MOUNT 0x2000 |
171 | #define LOG_REDOPAGE 0x0800 | 171 | #define LOG_REDOPAGE 0x0800 |
172 | #define LOG_NOREDOPAGE 0x0080 | 172 | #define LOG_NOREDOPAGE 0x0080 |
173 | #define LOG_NOREDOINOEXT 0x0040 | 173 | #define LOG_NOREDOINOEXT 0x0040 |
174 | #define LOG_UPDATEMAP 0x0008 | 174 | #define LOG_UPDATEMAP 0x0008 |
175 | #define LOG_NOREDOFILE 0x0001 | 175 | #define LOG_NOREDOFILE 0x0001 |
176 | 176 | ||
177 | /* REDOPAGE/NOREDOPAGE log record data type */ | 177 | /* REDOPAGE/NOREDOPAGE log record data type */ |
178 | #define LOG_INODE 0x0001 | 178 | #define LOG_INODE 0x0001 |
179 | #define LOG_XTREE 0x0002 | 179 | #define LOG_XTREE 0x0002 |
180 | #define LOG_DTREE 0x0004 | 180 | #define LOG_DTREE 0x0004 |
181 | #define LOG_BTROOT 0x0010 | 181 | #define LOG_BTROOT 0x0010 |
182 | #define LOG_EA 0x0020 | 182 | #define LOG_EA 0x0020 |
183 | #define LOG_ACL 0x0040 | 183 | #define LOG_ACL 0x0040 |
184 | #define LOG_DATA 0x0080 | 184 | #define LOG_DATA 0x0080 |
185 | #define LOG_NEW 0x0100 | 185 | #define LOG_NEW 0x0100 |
186 | #define LOG_EXTEND 0x0200 | 186 | #define LOG_EXTEND 0x0200 |
187 | #define LOG_RELOCATE 0x0400 | 187 | #define LOG_RELOCATE 0x0400 |
188 | #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ | 188 | #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ |
189 | 189 | ||
190 | /* UPDATEMAP log record descriptor type */ | 190 | /* UPDATEMAP log record descriptor type */ |
191 | #define LOG_ALLOCXADLIST 0x0080 | 191 | #define LOG_ALLOCXADLIST 0x0080 |
192 | #define LOG_ALLOCPXDLIST 0x0040 | 192 | #define LOG_ALLOCPXDLIST 0x0040 |
193 | #define LOG_ALLOCXAD 0x0020 | 193 | #define LOG_ALLOCXAD 0x0020 |
194 | #define LOG_ALLOCPXD 0x0010 | 194 | #define LOG_ALLOCPXD 0x0010 |
195 | #define LOG_FREEXADLIST 0x0008 | 195 | #define LOG_FREEXADLIST 0x0008 |
196 | #define LOG_FREEPXDLIST 0x0004 | 196 | #define LOG_FREEPXDLIST 0x0004 |
197 | #define LOG_FREEXAD 0x0002 | 197 | #define LOG_FREEXAD 0x0002 |
198 | #define LOG_FREEPXD 0x0001 | 198 | #define LOG_FREEPXD 0x0001 |
199 | 199 | ||
200 | 200 | ||
201 | struct lrd { | 201 | struct lrd { |
202 | /* | 202 | /* |
203 | * type independent area | 203 | * type independent area |
204 | */ | 204 | */ |
205 | __le32 logtid; /* 4: log transaction identifier */ | 205 | __le32 logtid; /* 4: log transaction identifier */ |
206 | __le32 backchain; /* 4: ptr to prev record of same transaction */ | 206 | __le32 backchain; /* 4: ptr to prev record of same transaction */ |
207 | __le16 type; /* 2: record type */ | 207 | __le16 type; /* 2: record type */ |
208 | __le16 length; /* 2: length of data in record (in byte) */ | 208 | __le16 length; /* 2: length of data in record (in byte) */ |
209 | __le32 aggregate; /* 4: file system lv/aggregate */ | 209 | __le32 aggregate; /* 4: file system lv/aggregate */ |
210 | /* (16) */ | 210 | /* (16) */ |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * type dependent area (20) | 213 | * type dependent area (20) |
214 | */ | 214 | */ |
215 | union { | 215 | union { |
216 | 216 | ||
217 | /* | 217 | /* |
218 | * COMMIT: commit | 218 | * COMMIT: commit |
219 | * | 219 | * |
220 | * transaction commit: no type-dependent information; | 220 | * transaction commit: no type-dependent information; |
221 | */ | 221 | */ |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * REDOPAGE: after-image | 224 | * REDOPAGE: after-image |
225 | * | 225 | * |
226 | * apply after-image; | 226 | * apply after-image; |
227 | * | 227 | * |
228 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 228 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
229 | */ | 229 | */ |
230 | struct { | 230 | struct { |
231 | __le32 fileset; /* 4: fileset number */ | 231 | __le32 fileset; /* 4: fileset number */ |
232 | __le32 inode; /* 4: inode number */ | 232 | __le32 inode; /* 4: inode number */ |
233 | __le16 type; /* 2: REDOPAGE record type */ | 233 | __le16 type; /* 2: REDOPAGE record type */ |
234 | __le16 l2linesize; /* 2: log2 of line size */ | 234 | __le16 l2linesize; /* 2: log2 of line size */ |
235 | pxd_t pxd; /* 8: on-disk page pxd */ | 235 | pxd_t pxd; /* 8: on-disk page pxd */ |
236 | } redopage; /* (20) */ | 236 | } redopage; /* (20) */ |
237 | 237 | ||
238 | /* | 238 | /* |
239 | * NOREDOPAGE: the page is freed | 239 | * NOREDOPAGE: the page is freed |
240 | * | 240 | * |
241 | * do not apply after-image records which precede this record | 241 | * do not apply after-image records which precede this record |
242 | * in the log with the same page block number to this page. | 242 | * in the log with the same page block number to this page. |
243 | * | 243 | * |
244 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 244 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
245 | */ | 245 | */ |
246 | struct { | 246 | struct { |
247 | __le32 fileset; /* 4: fileset number */ | 247 | __le32 fileset; /* 4: fileset number */ |
248 | __le32 inode; /* 4: inode number */ | 248 | __le32 inode; /* 4: inode number */ |
249 | __le16 type; /* 2: NOREDOPAGE record type */ | 249 | __le16 type; /* 2: NOREDOPAGE record type */ |
250 | __le16 rsrvd; /* 2: reserved */ | 250 | __le16 rsrvd; /* 2: reserved */ |
251 | pxd_t pxd; /* 8: on-disk page pxd */ | 251 | pxd_t pxd; /* 8: on-disk page pxd */ |
252 | } noredopage; /* (20) */ | 252 | } noredopage; /* (20) */ |
253 | 253 | ||
254 | /* | 254 | /* |
255 | * UPDATEMAP: update block allocation map | 255 | * UPDATEMAP: update block allocation map |
256 | * | 256 | * |
257 | * either in-line PXD, | 257 | * either in-line PXD, |
258 | * or out-of-line XADLIST; | 258 | * or out-of-line XADLIST; |
259 | * | 259 | * |
260 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 260 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
261 | */ | 261 | */ |
262 | struct { | 262 | struct { |
263 | __le32 fileset; /* 4: fileset number */ | 263 | __le32 fileset; /* 4: fileset number */ |
264 | __le32 inode; /* 4: inode number */ | 264 | __le32 inode; /* 4: inode number */ |
265 | __le16 type; /* 2: UPDATEMAP record type */ | 265 | __le16 type; /* 2: UPDATEMAP record type */ |
266 | __le16 nxd; /* 2: number of extents */ | 266 | __le16 nxd; /* 2: number of extents */ |
267 | pxd_t pxd; /* 8: pxd */ | 267 | pxd_t pxd; /* 8: pxd */ |
268 | } updatemap; /* (20) */ | 268 | } updatemap; /* (20) */ |
269 | 269 | ||
270 | /* | 270 | /* |
271 | * NOREDOINOEXT: the inode extent is freed | 271 | * NOREDOINOEXT: the inode extent is freed |
272 | * | 272 | * |
273 | * do not apply after-image records which precede this | 273 | * do not apply after-image records which precede this |
274 | * record in the log with the any of the 4 page block | 274 | * record in the log with the any of the 4 page block |
275 | * numbers in this inode extent. | 275 | * numbers in this inode extent. |
276 | * | 276 | * |
277 | * NOTE: The fileset and pxd fields MUST remain in | 277 | * NOTE: The fileset and pxd fields MUST remain in |
278 | * the same fields in the REDOPAGE record format. | 278 | * the same fields in the REDOPAGE record format. |
279 | * | 279 | * |
280 | */ | 280 | */ |
281 | struct { | 281 | struct { |
282 | __le32 fileset; /* 4: fileset number */ | 282 | __le32 fileset; /* 4: fileset number */ |
283 | __le32 iagnum; /* 4: IAG number */ | 283 | __le32 iagnum; /* 4: IAG number */ |
284 | __le32 inoext_idx; /* 4: inode extent index */ | 284 | __le32 inoext_idx; /* 4: inode extent index */ |
285 | pxd_t pxd; /* 8: on-disk page pxd */ | 285 | pxd_t pxd; /* 8: on-disk page pxd */ |
286 | } noredoinoext; /* (20) */ | 286 | } noredoinoext; /* (20) */ |
287 | 287 | ||
288 | /* | 288 | /* |
289 | * SYNCPT: log sync point | 289 | * SYNCPT: log sync point |
290 | * | 290 | * |
291 | * replay log upto syncpt address specified; | 291 | * replay log upto syncpt address specified; |
292 | */ | 292 | */ |
293 | struct { | 293 | struct { |
294 | __le32 sync; /* 4: syncpt address (0 = here) */ | 294 | __le32 sync; /* 4: syncpt address (0 = here) */ |
295 | } syncpt; | 295 | } syncpt; |
296 | 296 | ||
297 | /* | 297 | /* |
298 | * MOUNT: file system mount | 298 | * MOUNT: file system mount |
299 | * | 299 | * |
300 | * file system mount: no type-dependent information; | 300 | * file system mount: no type-dependent information; |
301 | */ | 301 | */ |
302 | 302 | ||
303 | /* | 303 | /* |
304 | * ? FREEXTENT: free specified extent(s) | 304 | * ? FREEXTENT: free specified extent(s) |
305 | * | 305 | * |
306 | * free specified extent(s) from block allocation map | 306 | * free specified extent(s) from block allocation map |
307 | * N.B.: nextents should be length of data/sizeof(xad_t) | 307 | * N.B.: nextents should be length of data/sizeof(xad_t) |
308 | */ | 308 | */ |
309 | struct { | 309 | struct { |
310 | __le32 type; /* 4: FREEXTENT record type */ | 310 | __le32 type; /* 4: FREEXTENT record type */ |
311 | __le32 nextent; /* 4: number of extents */ | 311 | __le32 nextent; /* 4: number of extents */ |
312 | 312 | ||
313 | /* data: PXD or XAD list */ | 313 | /* data: PXD or XAD list */ |
314 | } freextent; | 314 | } freextent; |
315 | 315 | ||
316 | /* | 316 | /* |
317 | * ? NOREDOFILE: this file is freed | 317 | * ? NOREDOFILE: this file is freed |
318 | * | 318 | * |
319 | * do not apply records which precede this record in the log | 319 | * do not apply records which precede this record in the log |
320 | * with the same inode number. | 320 | * with the same inode number. |
321 | * | 321 | * |
322 | * NOREDILE must be the first to be written at commit | 322 | * NOREDILE must be the first to be written at commit |
323 | * (last to be read in logredo()) - it prevents | 323 | * (last to be read in logredo()) - it prevents |
324 | * replay of preceding updates of all preceding generations | 324 | * replay of preceding updates of all preceding generations |
325 | * of the inumber esp. the on-disk inode itself, | 325 | * of the inumber esp. the on-disk inode itself, |
326 | * but does NOT prevent | 326 | * but does NOT prevent |
327 | * replay of the | 327 | * replay of the |
328 | */ | 328 | */ |
329 | struct { | 329 | struct { |
330 | __le32 fileset; /* 4: fileset number */ | 330 | __le32 fileset; /* 4: fileset number */ |
331 | __le32 inode; /* 4: inode number */ | 331 | __le32 inode; /* 4: inode number */ |
332 | } noredofile; | 332 | } noredofile; |
333 | 333 | ||
334 | /* | 334 | /* |
335 | * ? NEWPAGE: | 335 | * ? NEWPAGE: |
336 | * | 336 | * |
337 | * metadata type dependent | 337 | * metadata type dependent |
338 | */ | 338 | */ |
339 | struct { | 339 | struct { |
340 | __le32 fileset; /* 4: fileset number */ | 340 | __le32 fileset; /* 4: fileset number */ |
341 | __le32 inode; /* 4: inode number */ | 341 | __le32 inode; /* 4: inode number */ |
342 | __le32 type; /* 4: NEWPAGE record type */ | 342 | __le32 type; /* 4: NEWPAGE record type */ |
343 | pxd_t pxd; /* 8: on-disk page pxd */ | 343 | pxd_t pxd; /* 8: on-disk page pxd */ |
344 | } newpage; | 344 | } newpage; |
345 | 345 | ||
346 | /* | 346 | /* |
347 | * ? DUMMY: filler | 347 | * ? DUMMY: filler |
348 | * | 348 | * |
349 | * no type-dependent information | 349 | * no type-dependent information |
350 | */ | 350 | */ |
351 | } log; | 351 | } log; |
352 | }; /* (36) */ | 352 | }; /* (36) */ |
353 | 353 | ||
354 | #define LOGRDSIZE (sizeof(struct lrd)) | 354 | #define LOGRDSIZE (sizeof(struct lrd)) |
355 | 355 | ||
356 | /* | 356 | /* |
357 | * line vector descriptor | 357 | * line vector descriptor |
358 | */ | 358 | */ |
359 | struct lvd { | 359 | struct lvd { |
360 | __le16 offset; | 360 | __le16 offset; |
361 | __le16 length; | 361 | __le16 length; |
362 | }; | 362 | }; |
363 | 363 | ||
364 | 364 | ||
365 | /* | 365 | /* |
366 | * log logical volume | 366 | * log logical volume |
367 | */ | 367 | */ |
368 | struct jfs_log { | 368 | struct jfs_log { |
369 | 369 | ||
370 | struct list_head sb_list;/* This is used to sync metadata | 370 | struct list_head sb_list;/* This is used to sync metadata |
371 | * before writing syncpt. | 371 | * before writing syncpt. |
372 | */ | 372 | */ |
373 | struct list_head journal_list; /* Global list */ | 373 | struct list_head journal_list; /* Global list */ |
374 | struct block_device *bdev; /* 4: log lv pointer */ | 374 | struct block_device *bdev; /* 4: log lv pointer */ |
375 | int serial; /* 4: log mount serial number */ | 375 | int serial; /* 4: log mount serial number */ |
376 | 376 | ||
377 | s64 base; /* @8: log extent address (inline log ) */ | 377 | s64 base; /* @8: log extent address (inline log ) */ |
378 | int size; /* 4: log size in log page (in page) */ | 378 | int size; /* 4: log size in log page (in page) */ |
379 | int l2bsize; /* 4: log2 of bsize */ | 379 | int l2bsize; /* 4: log2 of bsize */ |
380 | 380 | ||
381 | long flag; /* 4: flag */ | 381 | long flag; /* 4: flag */ |
382 | 382 | ||
383 | struct lbuf *lbuf_free; /* 4: free lbufs */ | 383 | struct lbuf *lbuf_free; /* 4: free lbufs */ |
384 | wait_queue_head_t free_wait; /* 4: */ | 384 | wait_queue_head_t free_wait; /* 4: */ |
385 | 385 | ||
386 | /* log write */ | 386 | /* log write */ |
387 | int logtid; /* 4: log tid */ | 387 | int logtid; /* 4: log tid */ |
388 | int page; /* 4: page number of eol page */ | 388 | int page; /* 4: page number of eol page */ |
389 | int eor; /* 4: eor of last record in eol page */ | 389 | int eor; /* 4: eor of last record in eol page */ |
390 | struct lbuf *bp; /* 4: current log page buffer */ | 390 | struct lbuf *bp; /* 4: current log page buffer */ |
391 | 391 | ||
392 | struct semaphore loglock; /* 4: log write serialization lock */ | 392 | struct semaphore loglock; /* 4: log write serialization lock */ |
393 | 393 | ||
394 | /* syncpt */ | 394 | /* syncpt */ |
395 | int nextsync; /* 4: bytes to write before next syncpt */ | 395 | int nextsync; /* 4: bytes to write before next syncpt */ |
396 | int active; /* 4: */ | 396 | int active; /* 4: */ |
397 | wait_queue_head_t syncwait; /* 4: */ | 397 | wait_queue_head_t syncwait; /* 4: */ |
398 | 398 | ||
399 | /* commit */ | 399 | /* commit */ |
400 | uint cflag; /* 4: */ | 400 | uint cflag; /* 4: */ |
401 | struct list_head cqueue; /* FIFO commit queue */ | 401 | struct list_head cqueue; /* FIFO commit queue */ |
402 | struct tblock *flush_tblk; /* tblk we're waiting on for flush */ | 402 | struct tblock *flush_tblk; /* tblk we're waiting on for flush */ |
403 | int gcrtc; /* 4: GC_READY transaction count */ | 403 | int gcrtc; /* 4: GC_READY transaction count */ |
404 | struct tblock *gclrt; /* 4: latest GC_READY transaction */ | 404 | struct tblock *gclrt; /* 4: latest GC_READY transaction */ |
405 | spinlock_t gclock; /* 4: group commit lock */ | 405 | spinlock_t gclock; /* 4: group commit lock */ |
406 | int logsize; /* 4: log data area size in byte */ | 406 | int logsize; /* 4: log data area size in byte */ |
407 | int lsn; /* 4: end-of-log */ | 407 | int lsn; /* 4: end-of-log */ |
408 | int clsn; /* 4: clsn */ | 408 | int clsn; /* 4: clsn */ |
409 | int syncpt; /* 4: addr of last syncpt record */ | 409 | int syncpt; /* 4: addr of last syncpt record */ |
410 | int sync; /* 4: addr from last logsync() */ | 410 | int sync; /* 4: addr from last logsync() */ |
411 | struct list_head synclist; /* 8: logsynclist anchor */ | 411 | struct list_head synclist; /* 8: logsynclist anchor */ |
412 | spinlock_t synclock; /* 4: synclist lock */ | 412 | spinlock_t synclock; /* 4: synclist lock */ |
413 | struct lbuf *wqueue; /* 4: log pageout queue */ | 413 | struct lbuf *wqueue; /* 4: log pageout queue */ |
414 | int count; /* 4: count */ | 414 | int count; /* 4: count */ |
415 | char uuid[16]; /* 16: 128-bit uuid of log device */ | 415 | char uuid[16]; /* 16: 128-bit uuid of log device */ |
416 | 416 | ||
417 | int no_integrity; /* 3: flag to disable journaling to disk */ | 417 | int no_integrity; /* 3: flag to disable journaling to disk */ |
418 | }; | 418 | }; |
419 | 419 | ||
420 | /* | 420 | /* |
421 | * Log flag | 421 | * Log flag |
422 | */ | 422 | */ |
423 | #define log_INLINELOG 1 | 423 | #define log_INLINELOG 1 |
424 | #define log_SYNCBARRIER 2 | 424 | #define log_SYNCBARRIER 2 |
425 | #define log_QUIESCE 3 | 425 | #define log_QUIESCE 3 |
426 | #define log_FLUSH 4 | 426 | #define log_FLUSH 4 |
427 | 427 | ||
428 | /* | 428 | /* |
429 | * group commit flag | 429 | * group commit flag |
430 | */ | 430 | */ |
431 | /* jfs_log */ | 431 | /* jfs_log */ |
432 | #define logGC_PAGEOUT 0x00000001 | 432 | #define logGC_PAGEOUT 0x00000001 |
433 | 433 | ||
434 | /* tblock/lbuf */ | 434 | /* tblock/lbuf */ |
435 | #define tblkGC_QUEUE 0x0001 | 435 | #define tblkGC_QUEUE 0x0001 |
436 | #define tblkGC_READY 0x0002 | 436 | #define tblkGC_READY 0x0002 |
437 | #define tblkGC_COMMIT 0x0004 | 437 | #define tblkGC_COMMIT 0x0004 |
438 | #define tblkGC_COMMITTED 0x0008 | 438 | #define tblkGC_COMMITTED 0x0008 |
439 | #define tblkGC_EOP 0x0010 | 439 | #define tblkGC_EOP 0x0010 |
440 | #define tblkGC_FREE 0x0020 | 440 | #define tblkGC_FREE 0x0020 |
441 | #define tblkGC_LEADER 0x0040 | 441 | #define tblkGC_LEADER 0x0040 |
442 | #define tblkGC_ERROR 0x0080 | 442 | #define tblkGC_ERROR 0x0080 |
443 | #define tblkGC_LAZY 0x0100 // D230860 | 443 | #define tblkGC_LAZY 0x0100 // D230860 |
444 | #define tblkGC_UNLOCKED 0x0200 // D230860 | 444 | #define tblkGC_UNLOCKED 0x0200 // D230860 |
445 | 445 | ||
446 | /* | 446 | /* |
447 | * log cache buffer header | 447 | * log cache buffer header |
448 | */ | 448 | */ |
449 | struct lbuf { | 449 | struct lbuf { |
450 | struct jfs_log *l_log; /* 4: log associated with buffer */ | 450 | struct jfs_log *l_log; /* 4: log associated with buffer */ |
451 | 451 | ||
452 | /* | 452 | /* |
453 | * data buffer base area | 453 | * data buffer base area |
454 | */ | 454 | */ |
455 | uint l_flag; /* 4: pageout control flags */ | 455 | uint l_flag; /* 4: pageout control flags */ |
456 | 456 | ||
457 | struct lbuf *l_wqnext; /* 4: write queue link */ | 457 | struct lbuf *l_wqnext; /* 4: write queue link */ |
458 | struct lbuf *l_freelist; /* 4: freelistlink */ | 458 | struct lbuf *l_freelist; /* 4: freelistlink */ |
459 | 459 | ||
460 | int l_pn; /* 4: log page number */ | 460 | int l_pn; /* 4: log page number */ |
461 | int l_eor; /* 4: log record eor */ | 461 | int l_eor; /* 4: log record eor */ |
462 | int l_ceor; /* 4: committed log record eor */ | 462 | int l_ceor; /* 4: committed log record eor */ |
463 | 463 | ||
464 | s64 l_blkno; /* 8: log page block number */ | 464 | s64 l_blkno; /* 8: log page block number */ |
465 | caddr_t l_ldata; /* 4: data page */ | 465 | caddr_t l_ldata; /* 4: data page */ |
466 | struct page *l_page; /* The page itself */ | 466 | struct page *l_page; /* The page itself */ |
467 | uint l_offset; /* Offset of l_ldata within the page */ | 467 | uint l_offset; /* Offset of l_ldata within the page */ |
468 | 468 | ||
469 | wait_queue_head_t l_ioevent; /* 4: i/o done event */ | 469 | wait_queue_head_t l_ioevent; /* 4: i/o done event */ |
470 | }; | 470 | }; |
471 | 471 | ||
472 | /* Reuse l_freelist for redrive list */ | 472 | /* Reuse l_freelist for redrive list */ |
473 | #define l_redrive_next l_freelist | 473 | #define l_redrive_next l_freelist |
474 | 474 | ||
475 | /* | 475 | /* |
476 | * logsynclist block | 476 | * logsynclist block |
477 | * | 477 | * |
478 | * common logsyncblk prefix for jbuf_t and tblock | 478 | * common logsyncblk prefix for jbuf_t and tblock |
479 | */ | 479 | */ |
480 | struct logsyncblk { | 480 | struct logsyncblk { |
481 | u16 xflag; /* flags */ | 481 | u16 xflag; /* flags */ |
482 | u16 flag; /* only meaninful in tblock */ | 482 | u16 flag; /* only meaninful in tblock */ |
483 | lid_t lid; /* lock id */ | 483 | lid_t lid; /* lock id */ |
484 | s32 lsn; /* log sequence number */ | 484 | s32 lsn; /* log sequence number */ |
485 | struct list_head synclist; /* log sync list link */ | 485 | struct list_head synclist; /* log sync list link */ |
486 | }; | 486 | }; |
487 | 487 | ||
488 | /* | 488 | /* |
489 | * logsynclist serialization (per log) | 489 | * logsynclist serialization (per log) |
490 | */ | 490 | */ |
491 | 491 | ||
492 | #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) | 492 | #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) |
493 | #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) | 493 | #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) |
494 | #define LOGSYNC_UNLOCK(log, flags) \ | 494 | #define LOGSYNC_UNLOCK(log, flags) \ |
495 | spin_unlock_irqrestore(&(log)->synclock, flags) | 495 | spin_unlock_irqrestore(&(log)->synclock, flags) |
496 | 496 | ||
497 | /* compute the difference in bytes of lsn from sync point */ | 497 | /* compute the difference in bytes of lsn from sync point */ |
498 | #define logdiff(diff, lsn, log)\ | 498 | #define logdiff(diff, lsn, log)\ |
499 | {\ | 499 | {\ |
500 | diff = (lsn) - (log)->syncpt;\ | 500 | diff = (lsn) - (log)->syncpt;\ |
501 | if (diff < 0)\ | 501 | if (diff < 0)\ |
502 | diff += (log)->logsize;\ | 502 | diff += (log)->logsize;\ |
503 | } | 503 | } |
504 | 504 | ||
505 | extern int lmLogOpen(struct super_block *sb); | 505 | extern int lmLogOpen(struct super_block *sb); |
506 | extern int lmLogClose(struct super_block *sb); | 506 | extern int lmLogClose(struct super_block *sb); |
507 | extern int lmLogShutdown(struct jfs_log * log); | 507 | extern int lmLogShutdown(struct jfs_log * log); |
508 | extern int lmLogInit(struct jfs_log * log); | 508 | extern int lmLogInit(struct jfs_log * log); |
509 | extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); | 509 | extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); |
510 | extern int lmGroupCommit(struct jfs_log *, struct tblock *); | 510 | extern int lmGroupCommit(struct jfs_log *, struct tblock *); |
511 | extern int jfsIOWait(void *); | 511 | extern int jfsIOWait(void *); |
512 | extern void jfs_flush_journal(struct jfs_log * log, int wait); | 512 | extern void jfs_flush_journal(struct jfs_log * log, int wait); |
513 | extern void jfs_syncpt(struct jfs_log *log); | 513 | extern void jfs_syncpt(struct jfs_log *log, int hard_sync); |
514 | 514 | ||
515 | #endif /* _H_JFS_LOGMGR */ | 515 | #endif /* _H_JFS_LOGMGR */ |
516 | 516 |
fs/jfs/jfs_txnmgr.c
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2005 | 2 | * Copyright (C) International Business Machines Corp., 2000-2005 |
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | 3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
20 | /* | 20 | /* |
21 | * jfs_txnmgr.c: transaction manager | 21 | * jfs_txnmgr.c: transaction manager |
22 | * | 22 | * |
23 | * notes: | 23 | * notes: |
24 | * transaction starts with txBegin() and ends with txCommit() | 24 | * transaction starts with txBegin() and ends with txCommit() |
25 | * or txAbort(). | 25 | * or txAbort(). |
26 | * | 26 | * |
27 | * tlock is acquired at the time of update; | 27 | * tlock is acquired at the time of update; |
28 | * (obviate scan at commit time for xtree and dtree) | 28 | * (obviate scan at commit time for xtree and dtree) |
29 | * tlock and mp points to each other; | 29 | * tlock and mp points to each other; |
30 | * (no hashlist for mp -> tlock). | 30 | * (no hashlist for mp -> tlock). |
31 | * | 31 | * |
32 | * special cases: | 32 | * special cases: |
33 | * tlock on in-memory inode: | 33 | * tlock on in-memory inode: |
34 | * in-place tlock in the in-memory inode itself; | 34 | * in-place tlock in the in-memory inode itself; |
35 | * converted to page lock by iWrite() at commit time. | 35 | * converted to page lock by iWrite() at commit time. |
36 | * | 36 | * |
37 | * tlock during write()/mmap() under anonymous transaction (tid = 0): | 37 | * tlock during write()/mmap() under anonymous transaction (tid = 0): |
38 | * transferred (?) to transaction at commit time. | 38 | * transferred (?) to transaction at commit time. |
39 | * | 39 | * |
40 | * use the page itself to update allocation maps | 40 | * use the page itself to update allocation maps |
41 | * (obviate intermediate replication of allocation/deallocation data) | 41 | * (obviate intermediate replication of allocation/deallocation data) |
42 | * hold on to mp+lock thru update of maps | 42 | * hold on to mp+lock thru update of maps |
43 | */ | 43 | */ |
44 | 44 | ||
45 | #include <linux/fs.h> | 45 | #include <linux/fs.h> |
46 | #include <linux/vmalloc.h> | 46 | #include <linux/vmalloc.h> |
47 | #include <linux/smp_lock.h> | 47 | #include <linux/smp_lock.h> |
48 | #include <linux/completion.h> | 48 | #include <linux/completion.h> |
49 | #include <linux/suspend.h> | 49 | #include <linux/suspend.h> |
50 | #include <linux/module.h> | 50 | #include <linux/module.h> |
51 | #include <linux/moduleparam.h> | 51 | #include <linux/moduleparam.h> |
52 | #include "jfs_incore.h" | 52 | #include "jfs_incore.h" |
53 | #include "jfs_inode.h" | 53 | #include "jfs_inode.h" |
54 | #include "jfs_filsys.h" | 54 | #include "jfs_filsys.h" |
55 | #include "jfs_metapage.h" | 55 | #include "jfs_metapage.h" |
56 | #include "jfs_dinode.h" | 56 | #include "jfs_dinode.h" |
57 | #include "jfs_imap.h" | 57 | #include "jfs_imap.h" |
58 | #include "jfs_dmap.h" | 58 | #include "jfs_dmap.h" |
59 | #include "jfs_superblock.h" | 59 | #include "jfs_superblock.h" |
60 | #include "jfs_debug.h" | 60 | #include "jfs_debug.h" |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * transaction management structures | 63 | * transaction management structures |
64 | */ | 64 | */ |
65 | static struct { | 65 | static struct { |
66 | int freetid; /* index of a free tid structure */ | 66 | int freetid; /* index of a free tid structure */ |
67 | int freelock; /* index first free lock word */ | 67 | int freelock; /* index first free lock word */ |
68 | wait_queue_head_t freewait; /* eventlist of free tblock */ | 68 | wait_queue_head_t freewait; /* eventlist of free tblock */ |
69 | wait_queue_head_t freelockwait; /* eventlist of free tlock */ | 69 | wait_queue_head_t freelockwait; /* eventlist of free tlock */ |
70 | wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ | 70 | wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ |
71 | int tlocksInUse; /* Number of tlocks in use */ | 71 | int tlocksInUse; /* Number of tlocks in use */ |
72 | spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ | 72 | spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ |
73 | /* struct tblock *sync_queue; * Transactions waiting for data sync */ | 73 | /* struct tblock *sync_queue; * Transactions waiting for data sync */ |
74 | struct list_head unlock_queue; /* Txns waiting to be released */ | 74 | struct list_head unlock_queue; /* Txns waiting to be released */ |
75 | struct list_head anon_list; /* inodes having anonymous txns */ | 75 | struct list_head anon_list; /* inodes having anonymous txns */ |
76 | struct list_head anon_list2; /* inodes having anonymous txns | 76 | struct list_head anon_list2; /* inodes having anonymous txns |
77 | that couldn't be sync'ed */ | 77 | that couldn't be sync'ed */ |
78 | } TxAnchor; | 78 | } TxAnchor; |
79 | 79 | ||
80 | int jfs_tlocks_low; /* Indicates low number of available tlocks */ | 80 | int jfs_tlocks_low; /* Indicates low number of available tlocks */ |
81 | 81 | ||
82 | #ifdef CONFIG_JFS_STATISTICS | 82 | #ifdef CONFIG_JFS_STATISTICS |
83 | static struct { | 83 | static struct { |
84 | uint txBegin; | 84 | uint txBegin; |
85 | uint txBegin_barrier; | 85 | uint txBegin_barrier; |
86 | uint txBegin_lockslow; | 86 | uint txBegin_lockslow; |
87 | uint txBegin_freetid; | 87 | uint txBegin_freetid; |
88 | uint txBeginAnon; | 88 | uint txBeginAnon; |
89 | uint txBeginAnon_barrier; | 89 | uint txBeginAnon_barrier; |
90 | uint txBeginAnon_lockslow; | 90 | uint txBeginAnon_lockslow; |
91 | uint txLockAlloc; | 91 | uint txLockAlloc; |
92 | uint txLockAlloc_freelock; | 92 | uint txLockAlloc_freelock; |
93 | } TxStat; | 93 | } TxStat; |
94 | #endif | 94 | #endif |
95 | 95 | ||
96 | static int nTxBlock = -1; /* number of transaction blocks */ | 96 | static int nTxBlock = -1; /* number of transaction blocks */ |
97 | module_param(nTxBlock, int, 0); | 97 | module_param(nTxBlock, int, 0); |
98 | MODULE_PARM_DESC(nTxBlock, | 98 | MODULE_PARM_DESC(nTxBlock, |
99 | "Number of transaction blocks (max:65536)"); | 99 | "Number of transaction blocks (max:65536)"); |
100 | 100 | ||
101 | static int nTxLock = -1; /* number of transaction locks */ | 101 | static int nTxLock = -1; /* number of transaction locks */ |
102 | module_param(nTxLock, int, 0); | 102 | module_param(nTxLock, int, 0); |
103 | MODULE_PARM_DESC(nTxLock, | 103 | MODULE_PARM_DESC(nTxLock, |
104 | "Number of transaction locks (max:65536)"); | 104 | "Number of transaction locks (max:65536)"); |
105 | 105 | ||
106 | struct tblock *TxBlock; /* transaction block table */ | 106 | struct tblock *TxBlock; /* transaction block table */ |
107 | static int TxLockLWM; /* Low water mark for number of txLocks used */ | 107 | static int TxLockLWM; /* Low water mark for number of txLocks used */ |
108 | static int TxLockHWM; /* High water mark for number of txLocks used */ | 108 | static int TxLockHWM; /* High water mark for number of txLocks used */ |
109 | static int TxLockVHWM; /* Very High water mark */ | 109 | static int TxLockVHWM; /* Very High water mark */ |
110 | struct tlock *TxLock; /* transaction lock table */ | 110 | struct tlock *TxLock; /* transaction lock table */ |
111 | 111 | ||
112 | /* | 112 | /* |
113 | * transaction management lock | 113 | * transaction management lock |
114 | */ | 114 | */ |
115 | static DEFINE_SPINLOCK(jfsTxnLock); | 115 | static DEFINE_SPINLOCK(jfsTxnLock); |
116 | 116 | ||
117 | #define TXN_LOCK() spin_lock(&jfsTxnLock) | 117 | #define TXN_LOCK() spin_lock(&jfsTxnLock) |
118 | #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) | 118 | #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) |
119 | 119 | ||
120 | #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); | 120 | #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); |
121 | #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) | 121 | #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) |
122 | #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) | 122 | #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) |
123 | 123 | ||
124 | DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait); | 124 | DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait); |
125 | DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); | 125 | DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); |
126 | static int jfs_commit_thread_waking; | 126 | static int jfs_commit_thread_waking; |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Retry logic exist outside these macros to protect from spurrious wakeups. | 129 | * Retry logic exist outside these macros to protect from spurrious wakeups. |
130 | */ | 130 | */ |
131 | static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) | 131 | static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) |
132 | { | 132 | { |
133 | DECLARE_WAITQUEUE(wait, current); | 133 | DECLARE_WAITQUEUE(wait, current); |
134 | 134 | ||
135 | add_wait_queue(event, &wait); | 135 | add_wait_queue(event, &wait); |
136 | set_current_state(TASK_UNINTERRUPTIBLE); | 136 | set_current_state(TASK_UNINTERRUPTIBLE); |
137 | TXN_UNLOCK(); | 137 | TXN_UNLOCK(); |
138 | schedule(); | 138 | schedule(); |
139 | current->state = TASK_RUNNING; | 139 | current->state = TASK_RUNNING; |
140 | remove_wait_queue(event, &wait); | 140 | remove_wait_queue(event, &wait); |
141 | } | 141 | } |
142 | 142 | ||
143 | #define TXN_SLEEP(event)\ | 143 | #define TXN_SLEEP(event)\ |
144 | {\ | 144 | {\ |
145 | TXN_SLEEP_DROP_LOCK(event);\ | 145 | TXN_SLEEP_DROP_LOCK(event);\ |
146 | TXN_LOCK();\ | 146 | TXN_LOCK();\ |
147 | } | 147 | } |
148 | 148 | ||
149 | #define TXN_WAKEUP(event) wake_up_all(event) | 149 | #define TXN_WAKEUP(event) wake_up_all(event) |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * statistics | 152 | * statistics |
153 | */ | 153 | */ |
154 | static struct { | 154 | static struct { |
155 | tid_t maxtid; /* 4: biggest tid ever used */ | 155 | tid_t maxtid; /* 4: biggest tid ever used */ |
156 | lid_t maxlid; /* 4: biggest lid ever used */ | 156 | lid_t maxlid; /* 4: biggest lid ever used */ |
157 | int ntid; /* 4: # of transactions performed */ | 157 | int ntid; /* 4: # of transactions performed */ |
158 | int nlid; /* 4: # of tlocks acquired */ | 158 | int nlid; /* 4: # of tlocks acquired */ |
159 | int waitlock; /* 4: # of tlock wait */ | 159 | int waitlock; /* 4: # of tlock wait */ |
160 | } stattx; | 160 | } stattx; |
161 | 161 | ||
162 | /* | 162 | /* |
163 | * forward references | 163 | * forward references |
164 | */ | 164 | */ |
165 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 165 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
166 | struct tlock * tlck, struct commit * cd); | 166 | struct tlock * tlck, struct commit * cd); |
167 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 167 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
168 | struct tlock * tlck); | 168 | struct tlock * tlck); |
169 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 169 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
170 | struct tlock * tlck); | 170 | struct tlock * tlck); |
171 | static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 171 | static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
172 | struct tlock * tlck); | 172 | struct tlock * tlck); |
173 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, | 173 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, |
174 | struct tblock * tblk); | 174 | struct tblock * tblk); |
175 | static void txForce(struct tblock * tblk); | 175 | static void txForce(struct tblock * tblk); |
176 | static int txLog(struct jfs_log * log, struct tblock * tblk, | 176 | static int txLog(struct jfs_log * log, struct tblock * tblk, |
177 | struct commit * cd); | 177 | struct commit * cd); |
178 | static void txUpdateMap(struct tblock * tblk); | 178 | static void txUpdateMap(struct tblock * tblk); |
179 | static void txRelease(struct tblock * tblk); | 179 | static void txRelease(struct tblock * tblk); |
180 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 180 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
181 | struct tlock * tlck); | 181 | struct tlock * tlck); |
182 | static void LogSyncRelease(struct metapage * mp); | 182 | static void LogSyncRelease(struct metapage * mp); |
183 | 183 | ||
184 | /* | 184 | /* |
185 | * transaction block/lock management | 185 | * transaction block/lock management |
186 | * --------------------------------- | 186 | * --------------------------------- |
187 | */ | 187 | */ |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * Get a transaction lock from the free list. If the number in use is | 190 | * Get a transaction lock from the free list. If the number in use is |
191 | * greater than the high water mark, wake up the sync daemon. This should | 191 | * greater than the high water mark, wake up the sync daemon. This should |
192 | * free some anonymous transaction locks. (TXN_LOCK must be held.) | 192 | * free some anonymous transaction locks. (TXN_LOCK must be held.) |
193 | */ | 193 | */ |
194 | static lid_t txLockAlloc(void) | 194 | static lid_t txLockAlloc(void) |
195 | { | 195 | { |
196 | lid_t lid; | 196 | lid_t lid; |
197 | 197 | ||
198 | INCREMENT(TxStat.txLockAlloc); | 198 | INCREMENT(TxStat.txLockAlloc); |
199 | if (!TxAnchor.freelock) { | 199 | if (!TxAnchor.freelock) { |
200 | INCREMENT(TxStat.txLockAlloc_freelock); | 200 | INCREMENT(TxStat.txLockAlloc_freelock); |
201 | } | 201 | } |
202 | 202 | ||
203 | while (!(lid = TxAnchor.freelock)) | 203 | while (!(lid = TxAnchor.freelock)) |
204 | TXN_SLEEP(&TxAnchor.freelockwait); | 204 | TXN_SLEEP(&TxAnchor.freelockwait); |
205 | TxAnchor.freelock = TxLock[lid].next; | 205 | TxAnchor.freelock = TxLock[lid].next; |
206 | HIGHWATERMARK(stattx.maxlid, lid); | 206 | HIGHWATERMARK(stattx.maxlid, lid); |
207 | if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { | 207 | if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { |
208 | jfs_info("txLockAlloc tlocks low"); | 208 | jfs_info("txLockAlloc tlocks low"); |
209 | jfs_tlocks_low = 1; | 209 | jfs_tlocks_low = 1; |
210 | wake_up(&jfs_sync_thread_wait); | 210 | wake_up(&jfs_sync_thread_wait); |
211 | } | 211 | } |
212 | 212 | ||
213 | return lid; | 213 | return lid; |
214 | } | 214 | } |
215 | 215 | ||
216 | static void txLockFree(lid_t lid) | 216 | static void txLockFree(lid_t lid) |
217 | { | 217 | { |
218 | TxLock[lid].tid = 0; | 218 | TxLock[lid].tid = 0; |
219 | TxLock[lid].next = TxAnchor.freelock; | 219 | TxLock[lid].next = TxAnchor.freelock; |
220 | TxAnchor.freelock = lid; | 220 | TxAnchor.freelock = lid; |
221 | TxAnchor.tlocksInUse--; | 221 | TxAnchor.tlocksInUse--; |
222 | if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { | 222 | if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { |
223 | jfs_info("txLockFree jfs_tlocks_low no more"); | 223 | jfs_info("txLockFree jfs_tlocks_low no more"); |
224 | jfs_tlocks_low = 0; | 224 | jfs_tlocks_low = 0; |
225 | TXN_WAKEUP(&TxAnchor.lowlockwait); | 225 | TXN_WAKEUP(&TxAnchor.lowlockwait); |
226 | } | 226 | } |
227 | TXN_WAKEUP(&TxAnchor.freelockwait); | 227 | TXN_WAKEUP(&TxAnchor.freelockwait); |
228 | } | 228 | } |
229 | 229 | ||
230 | /* | 230 | /* |
231 | * NAME: txInit() | 231 | * NAME: txInit() |
232 | * | 232 | * |
233 | * FUNCTION: initialize transaction management structures | 233 | * FUNCTION: initialize transaction management structures |
234 | * | 234 | * |
235 | * RETURN: | 235 | * RETURN: |
236 | * | 236 | * |
237 | * serialization: single thread at jfs_init() | 237 | * serialization: single thread at jfs_init() |
238 | */ | 238 | */ |
239 | int txInit(void) | 239 | int txInit(void) |
240 | { | 240 | { |
241 | int k, size; | 241 | int k, size; |
242 | struct sysinfo si; | 242 | struct sysinfo si; |
243 | 243 | ||
244 | /* Set defaults for nTxLock and nTxBlock if unset */ | 244 | /* Set defaults for nTxLock and nTxBlock if unset */ |
245 | 245 | ||
246 | if (nTxLock == -1) { | 246 | if (nTxLock == -1) { |
247 | if (nTxBlock == -1) { | 247 | if (nTxBlock == -1) { |
248 | /* Base default on memory size */ | 248 | /* Base default on memory size */ |
249 | si_meminfo(&si); | 249 | si_meminfo(&si); |
250 | if (si.totalram > (256 * 1024)) /* 1 GB */ | 250 | if (si.totalram > (256 * 1024)) /* 1 GB */ |
251 | nTxLock = 64 * 1024; | 251 | nTxLock = 64 * 1024; |
252 | else | 252 | else |
253 | nTxLock = si.totalram >> 2; | 253 | nTxLock = si.totalram >> 2; |
254 | } else if (nTxBlock > (8 * 1024)) | 254 | } else if (nTxBlock > (8 * 1024)) |
255 | nTxLock = 64 * 1024; | 255 | nTxLock = 64 * 1024; |
256 | else | 256 | else |
257 | nTxLock = nTxBlock << 3; | 257 | nTxLock = nTxBlock << 3; |
258 | } | 258 | } |
259 | if (nTxBlock == -1) | 259 | if (nTxBlock == -1) |
260 | nTxBlock = nTxLock >> 3; | 260 | nTxBlock = nTxLock >> 3; |
261 | 261 | ||
262 | /* Verify tunable parameters */ | 262 | /* Verify tunable parameters */ |
263 | if (nTxBlock < 16) | 263 | if (nTxBlock < 16) |
264 | nTxBlock = 16; /* No one should set it this low */ | 264 | nTxBlock = 16; /* No one should set it this low */ |
265 | if (nTxBlock > 65536) | 265 | if (nTxBlock > 65536) |
266 | nTxBlock = 65536; | 266 | nTxBlock = 65536; |
267 | if (nTxLock < 256) | 267 | if (nTxLock < 256) |
268 | nTxLock = 256; /* No one should set it this low */ | 268 | nTxLock = 256; /* No one should set it this low */ |
269 | if (nTxLock > 65536) | 269 | if (nTxLock > 65536) |
270 | nTxLock = 65536; | 270 | nTxLock = 65536; |
271 | 271 | ||
272 | printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", | 272 | printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", |
273 | nTxBlock, nTxLock); | 273 | nTxBlock, nTxLock); |
274 | /* | 274 | /* |
275 | * initialize transaction block (tblock) table | 275 | * initialize transaction block (tblock) table |
276 | * | 276 | * |
277 | * transaction id (tid) = tblock index | 277 | * transaction id (tid) = tblock index |
278 | * tid = 0 is reserved. | 278 | * tid = 0 is reserved. |
279 | */ | 279 | */ |
280 | TxLockLWM = (nTxLock * 4) / 10; | 280 | TxLockLWM = (nTxLock * 4) / 10; |
281 | TxLockHWM = (nTxLock * 7) / 10; | 281 | TxLockHWM = (nTxLock * 7) / 10; |
282 | TxLockVHWM = (nTxLock * 8) / 10; | 282 | TxLockVHWM = (nTxLock * 8) / 10; |
283 | 283 | ||
284 | size = sizeof(struct tblock) * nTxBlock; | 284 | size = sizeof(struct tblock) * nTxBlock; |
285 | TxBlock = (struct tblock *) vmalloc(size); | 285 | TxBlock = (struct tblock *) vmalloc(size); |
286 | if (TxBlock == NULL) | 286 | if (TxBlock == NULL) |
287 | return -ENOMEM; | 287 | return -ENOMEM; |
288 | 288 | ||
289 | for (k = 1; k < nTxBlock - 1; k++) { | 289 | for (k = 1; k < nTxBlock - 1; k++) { |
290 | TxBlock[k].next = k + 1; | 290 | TxBlock[k].next = k + 1; |
291 | init_waitqueue_head(&TxBlock[k].gcwait); | 291 | init_waitqueue_head(&TxBlock[k].gcwait); |
292 | init_waitqueue_head(&TxBlock[k].waitor); | 292 | init_waitqueue_head(&TxBlock[k].waitor); |
293 | } | 293 | } |
294 | TxBlock[k].next = 0; | 294 | TxBlock[k].next = 0; |
295 | init_waitqueue_head(&TxBlock[k].gcwait); | 295 | init_waitqueue_head(&TxBlock[k].gcwait); |
296 | init_waitqueue_head(&TxBlock[k].waitor); | 296 | init_waitqueue_head(&TxBlock[k].waitor); |
297 | 297 | ||
298 | TxAnchor.freetid = 1; | 298 | TxAnchor.freetid = 1; |
299 | init_waitqueue_head(&TxAnchor.freewait); | 299 | init_waitqueue_head(&TxAnchor.freewait); |
300 | 300 | ||
301 | stattx.maxtid = 1; /* statistics */ | 301 | stattx.maxtid = 1; /* statistics */ |
302 | 302 | ||
303 | /* | 303 | /* |
304 | * initialize transaction lock (tlock) table | 304 | * initialize transaction lock (tlock) table |
305 | * | 305 | * |
306 | * transaction lock id = tlock index | 306 | * transaction lock id = tlock index |
307 | * tlock id = 0 is reserved. | 307 | * tlock id = 0 is reserved. |
308 | */ | 308 | */ |
309 | size = sizeof(struct tlock) * nTxLock; | 309 | size = sizeof(struct tlock) * nTxLock; |
310 | TxLock = (struct tlock *) vmalloc(size); | 310 | TxLock = (struct tlock *) vmalloc(size); |
311 | if (TxLock == NULL) { | 311 | if (TxLock == NULL) { |
312 | vfree(TxBlock); | 312 | vfree(TxBlock); |
313 | return -ENOMEM; | 313 | return -ENOMEM; |
314 | } | 314 | } |
315 | 315 | ||
316 | /* initialize tlock table */ | 316 | /* initialize tlock table */ |
317 | for (k = 1; k < nTxLock - 1; k++) | 317 | for (k = 1; k < nTxLock - 1; k++) |
318 | TxLock[k].next = k + 1; | 318 | TxLock[k].next = k + 1; |
319 | TxLock[k].next = 0; | 319 | TxLock[k].next = 0; |
320 | init_waitqueue_head(&TxAnchor.freelockwait); | 320 | init_waitqueue_head(&TxAnchor.freelockwait); |
321 | init_waitqueue_head(&TxAnchor.lowlockwait); | 321 | init_waitqueue_head(&TxAnchor.lowlockwait); |
322 | 322 | ||
323 | TxAnchor.freelock = 1; | 323 | TxAnchor.freelock = 1; |
324 | TxAnchor.tlocksInUse = 0; | 324 | TxAnchor.tlocksInUse = 0; |
325 | INIT_LIST_HEAD(&TxAnchor.anon_list); | 325 | INIT_LIST_HEAD(&TxAnchor.anon_list); |
326 | INIT_LIST_HEAD(&TxAnchor.anon_list2); | 326 | INIT_LIST_HEAD(&TxAnchor.anon_list2); |
327 | 327 | ||
328 | LAZY_LOCK_INIT(); | 328 | LAZY_LOCK_INIT(); |
329 | INIT_LIST_HEAD(&TxAnchor.unlock_queue); | 329 | INIT_LIST_HEAD(&TxAnchor.unlock_queue); |
330 | 330 | ||
331 | stattx.maxlid = 1; /* statistics */ | 331 | stattx.maxlid = 1; /* statistics */ |
332 | 332 | ||
333 | return 0; | 333 | return 0; |
334 | } | 334 | } |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * NAME: txExit() | 337 | * NAME: txExit() |
338 | * | 338 | * |
339 | * FUNCTION: clean up when module is unloaded | 339 | * FUNCTION: clean up when module is unloaded |
340 | */ | 340 | */ |
341 | void txExit(void) | 341 | void txExit(void) |
342 | { | 342 | { |
343 | vfree(TxLock); | 343 | vfree(TxLock); |
344 | TxLock = NULL; | 344 | TxLock = NULL; |
345 | vfree(TxBlock); | 345 | vfree(TxBlock); |
346 | TxBlock = NULL; | 346 | TxBlock = NULL; |
347 | } | 347 | } |
348 | 348 | ||
349 | /* | 349 | /* |
350 | * NAME: txBegin() | 350 | * NAME: txBegin() |
351 | * | 351 | * |
352 | * FUNCTION: start a transaction. | 352 | * FUNCTION: start a transaction. |
353 | * | 353 | * |
354 | * PARAMETER: sb - superblock | 354 | * PARAMETER: sb - superblock |
355 | * flag - force for nested tx; | 355 | * flag - force for nested tx; |
356 | * | 356 | * |
357 | * RETURN: tid - transaction id | 357 | * RETURN: tid - transaction id |
358 | * | 358 | * |
359 | * note: flag force allows to start tx for nested tx | 359 | * note: flag force allows to start tx for nested tx |
360 | * to prevent deadlock on logsync barrier; | 360 | * to prevent deadlock on logsync barrier; |
361 | */ | 361 | */ |
362 | tid_t txBegin(struct super_block *sb, int flag) | 362 | tid_t txBegin(struct super_block *sb, int flag) |
363 | { | 363 | { |
364 | tid_t t; | 364 | tid_t t; |
365 | struct tblock *tblk; | 365 | struct tblock *tblk; |
366 | struct jfs_log *log; | 366 | struct jfs_log *log; |
367 | 367 | ||
368 | jfs_info("txBegin: flag = 0x%x", flag); | 368 | jfs_info("txBegin: flag = 0x%x", flag); |
369 | log = JFS_SBI(sb)->log; | 369 | log = JFS_SBI(sb)->log; |
370 | 370 | ||
371 | TXN_LOCK(); | 371 | TXN_LOCK(); |
372 | 372 | ||
373 | INCREMENT(TxStat.txBegin); | 373 | INCREMENT(TxStat.txBegin); |
374 | 374 | ||
375 | retry: | 375 | retry: |
376 | if (!(flag & COMMIT_FORCE)) { | 376 | if (!(flag & COMMIT_FORCE)) { |
377 | /* | 377 | /* |
378 | * synchronize with logsync barrier | 378 | * synchronize with logsync barrier |
379 | */ | 379 | */ |
380 | if (test_bit(log_SYNCBARRIER, &log->flag) || | 380 | if (test_bit(log_SYNCBARRIER, &log->flag) || |
381 | test_bit(log_QUIESCE, &log->flag)) { | 381 | test_bit(log_QUIESCE, &log->flag)) { |
382 | INCREMENT(TxStat.txBegin_barrier); | 382 | INCREMENT(TxStat.txBegin_barrier); |
383 | TXN_SLEEP(&log->syncwait); | 383 | TXN_SLEEP(&log->syncwait); |
384 | goto retry; | 384 | goto retry; |
385 | } | 385 | } |
386 | } | 386 | } |
387 | if (flag == 0) { | 387 | if (flag == 0) { |
388 | /* | 388 | /* |
389 | * Don't begin transaction if we're getting starved for tlocks | 389 | * Don't begin transaction if we're getting starved for tlocks |
390 | * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately | 390 | * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately |
391 | * free tlocks) | 391 | * free tlocks) |
392 | */ | 392 | */ |
393 | if (TxAnchor.tlocksInUse > TxLockVHWM) { | 393 | if (TxAnchor.tlocksInUse > TxLockVHWM) { |
394 | INCREMENT(TxStat.txBegin_lockslow); | 394 | INCREMENT(TxStat.txBegin_lockslow); |
395 | TXN_SLEEP(&TxAnchor.lowlockwait); | 395 | TXN_SLEEP(&TxAnchor.lowlockwait); |
396 | goto retry; | 396 | goto retry; |
397 | } | 397 | } |
398 | } | 398 | } |
399 | 399 | ||
400 | /* | 400 | /* |
401 | * allocate transaction id/block | 401 | * allocate transaction id/block |
402 | */ | 402 | */ |
403 | if ((t = TxAnchor.freetid) == 0) { | 403 | if ((t = TxAnchor.freetid) == 0) { |
404 | jfs_info("txBegin: waiting for free tid"); | 404 | jfs_info("txBegin: waiting for free tid"); |
405 | INCREMENT(TxStat.txBegin_freetid); | 405 | INCREMENT(TxStat.txBegin_freetid); |
406 | TXN_SLEEP(&TxAnchor.freewait); | 406 | TXN_SLEEP(&TxAnchor.freewait); |
407 | goto retry; | 407 | goto retry; |
408 | } | 408 | } |
409 | 409 | ||
410 | tblk = tid_to_tblock(t); | 410 | tblk = tid_to_tblock(t); |
411 | 411 | ||
412 | if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { | 412 | if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { |
413 | /* Don't let a non-forced transaction take the last tblk */ | 413 | /* Don't let a non-forced transaction take the last tblk */ |
414 | jfs_info("txBegin: waiting for free tid"); | 414 | jfs_info("txBegin: waiting for free tid"); |
415 | INCREMENT(TxStat.txBegin_freetid); | 415 | INCREMENT(TxStat.txBegin_freetid); |
416 | TXN_SLEEP(&TxAnchor.freewait); | 416 | TXN_SLEEP(&TxAnchor.freewait); |
417 | goto retry; | 417 | goto retry; |
418 | } | 418 | } |
419 | 419 | ||
420 | TxAnchor.freetid = tblk->next; | 420 | TxAnchor.freetid = tblk->next; |
421 | 421 | ||
422 | /* | 422 | /* |
423 | * initialize transaction | 423 | * initialize transaction |
424 | */ | 424 | */ |
425 | 425 | ||
426 | /* | 426 | /* |
427 | * We can't zero the whole thing or we screw up another thread being | 427 | * We can't zero the whole thing or we screw up another thread being |
428 | * awakened after sleeping on tblk->waitor | 428 | * awakened after sleeping on tblk->waitor |
429 | * | 429 | * |
430 | * memset(tblk, 0, sizeof(struct tblock)); | 430 | * memset(tblk, 0, sizeof(struct tblock)); |
431 | */ | 431 | */ |
432 | tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; | 432 | tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; |
433 | 433 | ||
434 | tblk->sb = sb; | 434 | tblk->sb = sb; |
435 | ++log->logtid; | 435 | ++log->logtid; |
436 | tblk->logtid = log->logtid; | 436 | tblk->logtid = log->logtid; |
437 | 437 | ||
438 | ++log->active; | 438 | ++log->active; |
439 | 439 | ||
440 | HIGHWATERMARK(stattx.maxtid, t); /* statistics */ | 440 | HIGHWATERMARK(stattx.maxtid, t); /* statistics */ |
441 | INCREMENT(stattx.ntid); /* statistics */ | 441 | INCREMENT(stattx.ntid); /* statistics */ |
442 | 442 | ||
443 | TXN_UNLOCK(); | 443 | TXN_UNLOCK(); |
444 | 444 | ||
445 | jfs_info("txBegin: returning tid = %d", t); | 445 | jfs_info("txBegin: returning tid = %d", t); |
446 | 446 | ||
447 | return t; | 447 | return t; |
448 | } | 448 | } |
449 | 449 | ||
450 | /* | 450 | /* |
451 | * NAME: txBeginAnon() | 451 | * NAME: txBeginAnon() |
452 | * | 452 | * |
453 | * FUNCTION: start an anonymous transaction. | 453 | * FUNCTION: start an anonymous transaction. |
454 | * Blocks if logsync or available tlocks are low to prevent | 454 | * Blocks if logsync or available tlocks are low to prevent |
455 | * anonymous tlocks from depleting supply. | 455 | * anonymous tlocks from depleting supply. |
456 | * | 456 | * |
457 | * PARAMETER: sb - superblock | 457 | * PARAMETER: sb - superblock |
458 | * | 458 | * |
459 | * RETURN: none | 459 | * RETURN: none |
460 | */ | 460 | */ |
461 | void txBeginAnon(struct super_block *sb) | 461 | void txBeginAnon(struct super_block *sb) |
462 | { | 462 | { |
463 | struct jfs_log *log; | 463 | struct jfs_log *log; |
464 | 464 | ||
465 | log = JFS_SBI(sb)->log; | 465 | log = JFS_SBI(sb)->log; |
466 | 466 | ||
467 | TXN_LOCK(); | 467 | TXN_LOCK(); |
468 | INCREMENT(TxStat.txBeginAnon); | 468 | INCREMENT(TxStat.txBeginAnon); |
469 | 469 | ||
470 | retry: | 470 | retry: |
471 | /* | 471 | /* |
472 | * synchronize with logsync barrier | 472 | * synchronize with logsync barrier |
473 | */ | 473 | */ |
474 | if (test_bit(log_SYNCBARRIER, &log->flag) || | 474 | if (test_bit(log_SYNCBARRIER, &log->flag) || |
475 | test_bit(log_QUIESCE, &log->flag)) { | 475 | test_bit(log_QUIESCE, &log->flag)) { |
476 | INCREMENT(TxStat.txBeginAnon_barrier); | 476 | INCREMENT(TxStat.txBeginAnon_barrier); |
477 | TXN_SLEEP(&log->syncwait); | 477 | TXN_SLEEP(&log->syncwait); |
478 | goto retry; | 478 | goto retry; |
479 | } | 479 | } |
480 | 480 | ||
481 | /* | 481 | /* |
482 | * Don't begin transaction if we're getting starved for tlocks | 482 | * Don't begin transaction if we're getting starved for tlocks |
483 | */ | 483 | */ |
484 | if (TxAnchor.tlocksInUse > TxLockVHWM) { | 484 | if (TxAnchor.tlocksInUse > TxLockVHWM) { |
485 | INCREMENT(TxStat.txBeginAnon_lockslow); | 485 | INCREMENT(TxStat.txBeginAnon_lockslow); |
486 | TXN_SLEEP(&TxAnchor.lowlockwait); | 486 | TXN_SLEEP(&TxAnchor.lowlockwait); |
487 | goto retry; | 487 | goto retry; |
488 | } | 488 | } |
489 | TXN_UNLOCK(); | 489 | TXN_UNLOCK(); |
490 | } | 490 | } |
491 | 491 | ||
492 | /* | 492 | /* |
493 | * txEnd() | 493 | * txEnd() |
494 | * | 494 | * |
495 | * function: free specified transaction block. | 495 | * function: free specified transaction block. |
496 | * | 496 | * |
497 | * logsync barrier processing: | 497 | * logsync barrier processing: |
498 | * | 498 | * |
499 | * serialization: | 499 | * serialization: |
500 | */ | 500 | */ |
501 | void txEnd(tid_t tid) | 501 | void txEnd(tid_t tid) |
502 | { | 502 | { |
503 | struct tblock *tblk = tid_to_tblock(tid); | 503 | struct tblock *tblk = tid_to_tblock(tid); |
504 | struct jfs_log *log; | 504 | struct jfs_log *log; |
505 | 505 | ||
506 | jfs_info("txEnd: tid = %d", tid); | 506 | jfs_info("txEnd: tid = %d", tid); |
507 | TXN_LOCK(); | 507 | TXN_LOCK(); |
508 | 508 | ||
509 | /* | 509 | /* |
510 | * wakeup transactions waiting on the page locked | 510 | * wakeup transactions waiting on the page locked |
511 | * by the current transaction | 511 | * by the current transaction |
512 | */ | 512 | */ |
513 | TXN_WAKEUP(&tblk->waitor); | 513 | TXN_WAKEUP(&tblk->waitor); |
514 | 514 | ||
515 | log = JFS_SBI(tblk->sb)->log; | 515 | log = JFS_SBI(tblk->sb)->log; |
516 | 516 | ||
517 | /* | 517 | /* |
518 | * Lazy commit thread can't free this guy until we mark it UNLOCKED, | 518 | * Lazy commit thread can't free this guy until we mark it UNLOCKED, |
519 | * otherwise, we would be left with a transaction that may have been | 519 | * otherwise, we would be left with a transaction that may have been |
520 | * reused. | 520 | * reused. |
521 | * | 521 | * |
522 | * Lazy commit thread will turn off tblkGC_LAZY before calling this | 522 | * Lazy commit thread will turn off tblkGC_LAZY before calling this |
523 | * routine. | 523 | * routine. |
524 | */ | 524 | */ |
525 | if (tblk->flag & tblkGC_LAZY) { | 525 | if (tblk->flag & tblkGC_LAZY) { |
526 | jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); | 526 | jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); |
527 | TXN_UNLOCK(); | 527 | TXN_UNLOCK(); |
528 | 528 | ||
529 | spin_lock_irq(&log->gclock); // LOGGC_LOCK | 529 | spin_lock_irq(&log->gclock); // LOGGC_LOCK |
530 | tblk->flag |= tblkGC_UNLOCKED; | 530 | tblk->flag |= tblkGC_UNLOCKED; |
531 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | 531 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK |
532 | return; | 532 | return; |
533 | } | 533 | } |
534 | 534 | ||
535 | jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); | 535 | jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); |
536 | 536 | ||
537 | assert(tblk->next == 0); | 537 | assert(tblk->next == 0); |
538 | 538 | ||
539 | /* | 539 | /* |
540 | * insert tblock back on freelist | 540 | * insert tblock back on freelist |
541 | */ | 541 | */ |
542 | tblk->next = TxAnchor.freetid; | 542 | tblk->next = TxAnchor.freetid; |
543 | TxAnchor.freetid = tid; | 543 | TxAnchor.freetid = tid; |
544 | 544 | ||
545 | /* | 545 | /* |
546 | * mark the tblock not active | 546 | * mark the tblock not active |
547 | */ | 547 | */ |
548 | if (--log->active == 0) { | 548 | if (--log->active == 0) { |
549 | clear_bit(log_FLUSH, &log->flag); | 549 | clear_bit(log_FLUSH, &log->flag); |
550 | 550 | ||
551 | /* | 551 | /* |
552 | * synchronize with logsync barrier | 552 | * synchronize with logsync barrier |
553 | */ | 553 | */ |
554 | if (test_bit(log_SYNCBARRIER, &log->flag)) { | 554 | if (test_bit(log_SYNCBARRIER, &log->flag)) { |
555 | TXN_UNLOCK(); | ||
556 | |||
557 | /* write dirty metadata & forward log syncpt */ | ||
558 | jfs_syncpt(log, 1); | ||
559 | |||
555 | jfs_info("log barrier off: 0x%x", log->lsn); | 560 | jfs_info("log barrier off: 0x%x", log->lsn); |
556 | 561 | ||
557 | /* enable new transactions start */ | 562 | /* enable new transactions start */ |
558 | clear_bit(log_SYNCBARRIER, &log->flag); | 563 | clear_bit(log_SYNCBARRIER, &log->flag); |
559 | 564 | ||
560 | /* wakeup all waitors for logsync barrier */ | 565 | /* wakeup all waitors for logsync barrier */ |
561 | TXN_WAKEUP(&log->syncwait); | 566 | TXN_WAKEUP(&log->syncwait); |
562 | |||
563 | TXN_UNLOCK(); | ||
564 | |||
565 | /* forward log syncpt */ | ||
566 | jfs_syncpt(log); | ||
567 | 567 | ||
568 | goto wakeup; | 568 | goto wakeup; |
569 | } | 569 | } |
570 | } | 570 | } |
571 | 571 | ||
572 | TXN_UNLOCK(); | 572 | TXN_UNLOCK(); |
573 | wakeup: | 573 | wakeup: |
574 | /* | 574 | /* |
575 | * wakeup all waitors for a free tblock | 575 | * wakeup all waitors for a free tblock |
576 | */ | 576 | */ |
577 | TXN_WAKEUP(&TxAnchor.freewait); | 577 | TXN_WAKEUP(&TxAnchor.freewait); |
578 | } | 578 | } |
579 | 579 | ||
580 | /* | 580 | /* |
581 | * txLock() | 581 | * txLock() |
582 | * | 582 | * |
583 | * function: acquire a transaction lock on the specified <mp> | 583 | * function: acquire a transaction lock on the specified <mp> |
584 | * | 584 | * |
585 | * parameter: | 585 | * parameter: |
586 | * | 586 | * |
587 | * return: transaction lock id | 587 | * return: transaction lock id |
588 | * | 588 | * |
589 | * serialization: | 589 | * serialization: |
590 | */ | 590 | */ |
591 | struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, | 591 | struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, |
592 | int type) | 592 | int type) |
593 | { | 593 | { |
594 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 594 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
595 | int dir_xtree = 0; | 595 | int dir_xtree = 0; |
596 | lid_t lid; | 596 | lid_t lid; |
597 | tid_t xtid; | 597 | tid_t xtid; |
598 | struct tlock *tlck; | 598 | struct tlock *tlck; |
599 | struct xtlock *xtlck; | 599 | struct xtlock *xtlck; |
600 | struct linelock *linelock; | 600 | struct linelock *linelock; |
601 | xtpage_t *p; | 601 | xtpage_t *p; |
602 | struct tblock *tblk; | 602 | struct tblock *tblk; |
603 | 603 | ||
604 | TXN_LOCK(); | 604 | TXN_LOCK(); |
605 | 605 | ||
606 | if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && | 606 | if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && |
607 | !(mp->xflag & COMMIT_PAGE)) { | 607 | !(mp->xflag & COMMIT_PAGE)) { |
608 | /* | 608 | /* |
609 | * Directory inode is special. It can have both an xtree tlock | 609 | * Directory inode is special. It can have both an xtree tlock |
610 | * and a dtree tlock associated with it. | 610 | * and a dtree tlock associated with it. |
611 | */ | 611 | */ |
612 | dir_xtree = 1; | 612 | dir_xtree = 1; |
613 | lid = jfs_ip->xtlid; | 613 | lid = jfs_ip->xtlid; |
614 | } else | 614 | } else |
615 | lid = mp->lid; | 615 | lid = mp->lid; |
616 | 616 | ||
617 | /* is page not locked by a transaction ? */ | 617 | /* is page not locked by a transaction ? */ |
618 | if (lid == 0) | 618 | if (lid == 0) |
619 | goto allocateLock; | 619 | goto allocateLock; |
620 | 620 | ||
621 | jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); | 621 | jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); |
622 | 622 | ||
623 | /* is page locked by the requester transaction ? */ | 623 | /* is page locked by the requester transaction ? */ |
624 | tlck = lid_to_tlock(lid); | 624 | tlck = lid_to_tlock(lid); |
625 | if ((xtid = tlck->tid) == tid) { | 625 | if ((xtid = tlck->tid) == tid) { |
626 | TXN_UNLOCK(); | 626 | TXN_UNLOCK(); |
627 | goto grantLock; | 627 | goto grantLock; |
628 | } | 628 | } |
629 | 629 | ||
630 | /* | 630 | /* |
631 | * is page locked by anonymous transaction/lock ? | 631 | * is page locked by anonymous transaction/lock ? |
632 | * | 632 | * |
633 | * (page update without transaction (i.e., file write) is | 633 | * (page update without transaction (i.e., file write) is |
634 | * locked under anonymous transaction tid = 0: | 634 | * locked under anonymous transaction tid = 0: |
635 | * anonymous tlocks maintained on anonymous tlock list of | 635 | * anonymous tlocks maintained on anonymous tlock list of |
636 | * the inode of the page and available to all anonymous | 636 | * the inode of the page and available to all anonymous |
637 | * transactions until txCommit() time at which point | 637 | * transactions until txCommit() time at which point |
638 | * they are transferred to the transaction tlock list of | 638 | * they are transferred to the transaction tlock list of |
639 | * the commiting transaction of the inode) | 639 | * the commiting transaction of the inode) |
640 | */ | 640 | */ |
641 | if (xtid == 0) { | 641 | if (xtid == 0) { |
642 | tlck->tid = tid; | 642 | tlck->tid = tid; |
643 | TXN_UNLOCK(); | 643 | TXN_UNLOCK(); |
644 | tblk = tid_to_tblock(tid); | 644 | tblk = tid_to_tblock(tid); |
645 | /* | 645 | /* |
646 | * The order of the tlocks in the transaction is important | 646 | * The order of the tlocks in the transaction is important |
647 | * (during truncate, child xtree pages must be freed before | 647 | * (during truncate, child xtree pages must be freed before |
648 | * parent's tlocks change the working map). | 648 | * parent's tlocks change the working map). |
649 | * Take tlock off anonymous list and add to tail of | 649 | * Take tlock off anonymous list and add to tail of |
650 | * transaction list | 650 | * transaction list |
651 | * | 651 | * |
652 | * Note: We really need to get rid of the tid & lid and | 652 | * Note: We really need to get rid of the tid & lid and |
653 | * use list_head's. This code is getting UGLY! | 653 | * use list_head's. This code is getting UGLY! |
654 | */ | 654 | */ |
655 | if (jfs_ip->atlhead == lid) { | 655 | if (jfs_ip->atlhead == lid) { |
656 | if (jfs_ip->atltail == lid) { | 656 | if (jfs_ip->atltail == lid) { |
657 | /* only anonymous txn. | 657 | /* only anonymous txn. |
658 | * Remove from anon_list | 658 | * Remove from anon_list |
659 | */ | 659 | */ |
660 | list_del_init(&jfs_ip->anon_inode_list); | 660 | list_del_init(&jfs_ip->anon_inode_list); |
661 | } | 661 | } |
662 | jfs_ip->atlhead = tlck->next; | 662 | jfs_ip->atlhead = tlck->next; |
663 | } else { | 663 | } else { |
664 | lid_t last; | 664 | lid_t last; |
665 | for (last = jfs_ip->atlhead; | 665 | for (last = jfs_ip->atlhead; |
666 | lid_to_tlock(last)->next != lid; | 666 | lid_to_tlock(last)->next != lid; |
667 | last = lid_to_tlock(last)->next) { | 667 | last = lid_to_tlock(last)->next) { |
668 | assert(last); | 668 | assert(last); |
669 | } | 669 | } |
670 | lid_to_tlock(last)->next = tlck->next; | 670 | lid_to_tlock(last)->next = tlck->next; |
671 | if (jfs_ip->atltail == lid) | 671 | if (jfs_ip->atltail == lid) |
672 | jfs_ip->atltail = last; | 672 | jfs_ip->atltail = last; |
673 | } | 673 | } |
674 | 674 | ||
675 | /* insert the tlock at tail of transaction tlock list */ | 675 | /* insert the tlock at tail of transaction tlock list */ |
676 | 676 | ||
677 | if (tblk->next) | 677 | if (tblk->next) |
678 | lid_to_tlock(tblk->last)->next = lid; | 678 | lid_to_tlock(tblk->last)->next = lid; |
679 | else | 679 | else |
680 | tblk->next = lid; | 680 | tblk->next = lid; |
681 | tlck->next = 0; | 681 | tlck->next = 0; |
682 | tblk->last = lid; | 682 | tblk->last = lid; |
683 | 683 | ||
684 | goto grantLock; | 684 | goto grantLock; |
685 | } | 685 | } |
686 | 686 | ||
687 | goto waitLock; | 687 | goto waitLock; |
688 | 688 | ||
689 | /* | 689 | /* |
690 | * allocate a tlock | 690 | * allocate a tlock |
691 | */ | 691 | */ |
692 | allocateLock: | 692 | allocateLock: |
693 | lid = txLockAlloc(); | 693 | lid = txLockAlloc(); |
694 | tlck = lid_to_tlock(lid); | 694 | tlck = lid_to_tlock(lid); |
695 | 695 | ||
696 | /* | 696 | /* |
697 | * initialize tlock | 697 | * initialize tlock |
698 | */ | 698 | */ |
699 | tlck->tid = tid; | 699 | tlck->tid = tid; |
700 | 700 | ||
701 | TXN_UNLOCK(); | 701 | TXN_UNLOCK(); |
702 | 702 | ||
703 | /* mark tlock for meta-data page */ | 703 | /* mark tlock for meta-data page */ |
704 | if (mp->xflag & COMMIT_PAGE) { | 704 | if (mp->xflag & COMMIT_PAGE) { |
705 | 705 | ||
706 | tlck->flag = tlckPAGELOCK; | 706 | tlck->flag = tlckPAGELOCK; |
707 | 707 | ||
708 | /* mark the page dirty and nohomeok */ | 708 | /* mark the page dirty and nohomeok */ |
709 | metapage_nohomeok(mp); | 709 | metapage_nohomeok(mp); |
710 | 710 | ||
711 | jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", | 711 | jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", |
712 | mp, mp->nohomeok, tid, tlck); | 712 | mp, mp->nohomeok, tid, tlck); |
713 | 713 | ||
714 | /* if anonymous transaction, and buffer is on the group | 714 | /* if anonymous transaction, and buffer is on the group |
715 | * commit synclist, mark inode to show this. This will | 715 | * commit synclist, mark inode to show this. This will |
716 | * prevent the buffer from being marked nohomeok for too | 716 | * prevent the buffer from being marked nohomeok for too |
717 | * long a time. | 717 | * long a time. |
718 | */ | 718 | */ |
719 | if ((tid == 0) && mp->lsn) | 719 | if ((tid == 0) && mp->lsn) |
720 | set_cflag(COMMIT_Synclist, ip); | 720 | set_cflag(COMMIT_Synclist, ip); |
721 | } | 721 | } |
722 | /* mark tlock for in-memory inode */ | 722 | /* mark tlock for in-memory inode */ |
723 | else | 723 | else |
724 | tlck->flag = tlckINODELOCK; | 724 | tlck->flag = tlckINODELOCK; |
725 | 725 | ||
726 | tlck->type = 0; | 726 | tlck->type = 0; |
727 | 727 | ||
728 | /* bind the tlock and the page */ | 728 | /* bind the tlock and the page */ |
729 | tlck->ip = ip; | 729 | tlck->ip = ip; |
730 | tlck->mp = mp; | 730 | tlck->mp = mp; |
731 | if (dir_xtree) | 731 | if (dir_xtree) |
732 | jfs_ip->xtlid = lid; | 732 | jfs_ip->xtlid = lid; |
733 | else | 733 | else |
734 | mp->lid = lid; | 734 | mp->lid = lid; |
735 | 735 | ||
736 | /* | 736 | /* |
737 | * enqueue transaction lock to transaction/inode | 737 | * enqueue transaction lock to transaction/inode |
738 | */ | 738 | */ |
739 | /* insert the tlock at tail of transaction tlock list */ | 739 | /* insert the tlock at tail of transaction tlock list */ |
740 | if (tid) { | 740 | if (tid) { |
741 | tblk = tid_to_tblock(tid); | 741 | tblk = tid_to_tblock(tid); |
742 | if (tblk->next) | 742 | if (tblk->next) |
743 | lid_to_tlock(tblk->last)->next = lid; | 743 | lid_to_tlock(tblk->last)->next = lid; |
744 | else | 744 | else |
745 | tblk->next = lid; | 745 | tblk->next = lid; |
746 | tlck->next = 0; | 746 | tlck->next = 0; |
747 | tblk->last = lid; | 747 | tblk->last = lid; |
748 | } | 748 | } |
749 | /* anonymous transaction: | 749 | /* anonymous transaction: |
750 | * insert the tlock at head of inode anonymous tlock list | 750 | * insert the tlock at head of inode anonymous tlock list |
751 | */ | 751 | */ |
752 | else { | 752 | else { |
753 | tlck->next = jfs_ip->atlhead; | 753 | tlck->next = jfs_ip->atlhead; |
754 | jfs_ip->atlhead = lid; | 754 | jfs_ip->atlhead = lid; |
755 | if (tlck->next == 0) { | 755 | if (tlck->next == 0) { |
756 | /* This inode's first anonymous transaction */ | 756 | /* This inode's first anonymous transaction */ |
757 | jfs_ip->atltail = lid; | 757 | jfs_ip->atltail = lid; |
758 | TXN_LOCK(); | 758 | TXN_LOCK(); |
759 | list_add_tail(&jfs_ip->anon_inode_list, | 759 | list_add_tail(&jfs_ip->anon_inode_list, |
760 | &TxAnchor.anon_list); | 760 | &TxAnchor.anon_list); |
761 | TXN_UNLOCK(); | 761 | TXN_UNLOCK(); |
762 | } | 762 | } |
763 | } | 763 | } |
764 | 764 | ||
765 | /* initialize type dependent area for linelock */ | 765 | /* initialize type dependent area for linelock */ |
766 | linelock = (struct linelock *) & tlck->lock; | 766 | linelock = (struct linelock *) & tlck->lock; |
767 | linelock->next = 0; | 767 | linelock->next = 0; |
768 | linelock->flag = tlckLINELOCK; | 768 | linelock->flag = tlckLINELOCK; |
769 | linelock->maxcnt = TLOCKSHORT; | 769 | linelock->maxcnt = TLOCKSHORT; |
770 | linelock->index = 0; | 770 | linelock->index = 0; |
771 | 771 | ||
772 | switch (type & tlckTYPE) { | 772 | switch (type & tlckTYPE) { |
773 | case tlckDTREE: | 773 | case tlckDTREE: |
774 | linelock->l2linesize = L2DTSLOTSIZE; | 774 | linelock->l2linesize = L2DTSLOTSIZE; |
775 | break; | 775 | break; |
776 | 776 | ||
777 | case tlckXTREE: | 777 | case tlckXTREE: |
778 | linelock->l2linesize = L2XTSLOTSIZE; | 778 | linelock->l2linesize = L2XTSLOTSIZE; |
779 | 779 | ||
780 | xtlck = (struct xtlock *) linelock; | 780 | xtlck = (struct xtlock *) linelock; |
781 | xtlck->header.offset = 0; | 781 | xtlck->header.offset = 0; |
782 | xtlck->header.length = 2; | 782 | xtlck->header.length = 2; |
783 | 783 | ||
784 | if (type & tlckNEW) { | 784 | if (type & tlckNEW) { |
785 | xtlck->lwm.offset = XTENTRYSTART; | 785 | xtlck->lwm.offset = XTENTRYSTART; |
786 | } else { | 786 | } else { |
787 | if (mp->xflag & COMMIT_PAGE) | 787 | if (mp->xflag & COMMIT_PAGE) |
788 | p = (xtpage_t *) mp->data; | 788 | p = (xtpage_t *) mp->data; |
789 | else | 789 | else |
790 | p = &jfs_ip->i_xtroot; | 790 | p = &jfs_ip->i_xtroot; |
791 | xtlck->lwm.offset = | 791 | xtlck->lwm.offset = |
792 | le16_to_cpu(p->header.nextindex); | 792 | le16_to_cpu(p->header.nextindex); |
793 | } | 793 | } |
794 | xtlck->lwm.length = 0; /* ! */ | 794 | xtlck->lwm.length = 0; /* ! */ |
795 | xtlck->twm.offset = 0; | 795 | xtlck->twm.offset = 0; |
796 | xtlck->hwm.offset = 0; | 796 | xtlck->hwm.offset = 0; |
797 | 797 | ||
798 | xtlck->index = 2; | 798 | xtlck->index = 2; |
799 | break; | 799 | break; |
800 | 800 | ||
801 | case tlckINODE: | 801 | case tlckINODE: |
802 | linelock->l2linesize = L2INODESLOTSIZE; | 802 | linelock->l2linesize = L2INODESLOTSIZE; |
803 | break; | 803 | break; |
804 | 804 | ||
805 | case tlckDATA: | 805 | case tlckDATA: |
806 | linelock->l2linesize = L2DATASLOTSIZE; | 806 | linelock->l2linesize = L2DATASLOTSIZE; |
807 | break; | 807 | break; |
808 | 808 | ||
809 | default: | 809 | default: |
810 | jfs_err("UFO tlock:0x%p", tlck); | 810 | jfs_err("UFO tlock:0x%p", tlck); |
811 | } | 811 | } |
812 | 812 | ||
813 | /* | 813 | /* |
814 | * update tlock vector | 814 | * update tlock vector |
815 | */ | 815 | */ |
816 | grantLock: | 816 | grantLock: |
817 | tlck->type |= type; | 817 | tlck->type |= type; |
818 | 818 | ||
819 | return tlck; | 819 | return tlck; |
820 | 820 | ||
821 | /* | 821 | /* |
822 | * page is being locked by another transaction: | 822 | * page is being locked by another transaction: |
823 | */ | 823 | */ |
824 | waitLock: | 824 | waitLock: |
825 | /* Only locks on ipimap or ipaimap should reach here */ | 825 | /* Only locks on ipimap or ipaimap should reach here */ |
826 | /* assert(jfs_ip->fileset == AGGREGATE_I); */ | 826 | /* assert(jfs_ip->fileset == AGGREGATE_I); */ |
827 | if (jfs_ip->fileset != AGGREGATE_I) { | 827 | if (jfs_ip->fileset != AGGREGATE_I) { |
828 | jfs_err("txLock: trying to lock locked page!"); | 828 | jfs_err("txLock: trying to lock locked page!"); |
829 | dump_mem("ip", ip, sizeof(struct inode)); | 829 | dump_mem("ip", ip, sizeof(struct inode)); |
830 | dump_mem("mp", mp, sizeof(struct metapage)); | 830 | dump_mem("mp", mp, sizeof(struct metapage)); |
831 | dump_mem("Locker's tblk", tid_to_tblock(tid), | 831 | dump_mem("Locker's tblk", tid_to_tblock(tid), |
832 | sizeof(struct tblock)); | 832 | sizeof(struct tblock)); |
833 | dump_mem("Tlock", tlck, sizeof(struct tlock)); | 833 | dump_mem("Tlock", tlck, sizeof(struct tlock)); |
834 | BUG(); | 834 | BUG(); |
835 | } | 835 | } |
836 | INCREMENT(stattx.waitlock); /* statistics */ | 836 | INCREMENT(stattx.waitlock); /* statistics */ |
837 | TXN_UNLOCK(); | 837 | TXN_UNLOCK(); |
838 | release_metapage(mp); | 838 | release_metapage(mp); |
839 | TXN_LOCK(); | 839 | TXN_LOCK(); |
840 | xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ | 840 | xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ |
841 | 841 | ||
842 | jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", | 842 | jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", |
843 | tid, xtid, lid); | 843 | tid, xtid, lid); |
844 | 844 | ||
845 | /* Recheck everything since dropping TXN_LOCK */ | 845 | /* Recheck everything since dropping TXN_LOCK */ |
846 | if (xtid && (tlck->mp == mp) && (mp->lid == lid)) | 846 | if (xtid && (tlck->mp == mp) && (mp->lid == lid)) |
847 | TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); | 847 | TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); |
848 | else | 848 | else |
849 | TXN_UNLOCK(); | 849 | TXN_UNLOCK(); |
850 | jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); | 850 | jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); |
851 | 851 | ||
852 | return NULL; | 852 | return NULL; |
853 | } | 853 | } |
854 | 854 | ||
855 | /* | 855 | /* |
856 | * NAME: txRelease() | 856 | * NAME: txRelease() |
857 | * | 857 | * |
858 | * FUNCTION: Release buffers associated with transaction locks, but don't | 858 | * FUNCTION: Release buffers associated with transaction locks, but don't |
859 | * mark homeok yet. The allows other transactions to modify | 859 | * mark homeok yet. The allows other transactions to modify |
860 | * buffers, but won't let them go to disk until commit record | 860 | * buffers, but won't let them go to disk until commit record |
861 | * actually gets written. | 861 | * actually gets written. |
862 | * | 862 | * |
863 | * PARAMETER: | 863 | * PARAMETER: |
864 | * tblk - | 864 | * tblk - |
865 | * | 865 | * |
866 | * RETURN: Errors from subroutines. | 866 | * RETURN: Errors from subroutines. |
867 | */ | 867 | */ |
868 | static void txRelease(struct tblock * tblk) | 868 | static void txRelease(struct tblock * tblk) |
869 | { | 869 | { |
870 | struct metapage *mp; | 870 | struct metapage *mp; |
871 | lid_t lid; | 871 | lid_t lid; |
872 | struct tlock *tlck; | 872 | struct tlock *tlck; |
873 | 873 | ||
874 | TXN_LOCK(); | 874 | TXN_LOCK(); |
875 | 875 | ||
876 | for (lid = tblk->next; lid; lid = tlck->next) { | 876 | for (lid = tblk->next; lid; lid = tlck->next) { |
877 | tlck = lid_to_tlock(lid); | 877 | tlck = lid_to_tlock(lid); |
878 | if ((mp = tlck->mp) != NULL && | 878 | if ((mp = tlck->mp) != NULL && |
879 | (tlck->type & tlckBTROOT) == 0) { | 879 | (tlck->type & tlckBTROOT) == 0) { |
880 | assert(mp->xflag & COMMIT_PAGE); | 880 | assert(mp->xflag & COMMIT_PAGE); |
881 | mp->lid = 0; | 881 | mp->lid = 0; |
882 | } | 882 | } |
883 | } | 883 | } |
884 | 884 | ||
885 | /* | 885 | /* |
886 | * wakeup transactions waiting on a page locked | 886 | * wakeup transactions waiting on a page locked |
887 | * by the current transaction | 887 | * by the current transaction |
888 | */ | 888 | */ |
889 | TXN_WAKEUP(&tblk->waitor); | 889 | TXN_WAKEUP(&tblk->waitor); |
890 | 890 | ||
891 | TXN_UNLOCK(); | 891 | TXN_UNLOCK(); |
892 | } | 892 | } |
893 | 893 | ||
894 | /* | 894 | /* |
895 | * NAME: txUnlock() | 895 | * NAME: txUnlock() |
896 | * | 896 | * |
897 | * FUNCTION: Initiates pageout of pages modified by tid in journalled | 897 | * FUNCTION: Initiates pageout of pages modified by tid in journalled |
898 | * objects and frees their lockwords. | 898 | * objects and frees their lockwords. |
899 | */ | 899 | */ |
900 | static void txUnlock(struct tblock * tblk) | 900 | static void txUnlock(struct tblock * tblk) |
901 | { | 901 | { |
902 | struct tlock *tlck; | 902 | struct tlock *tlck; |
903 | struct linelock *linelock; | 903 | struct linelock *linelock; |
904 | lid_t lid, next, llid, k; | 904 | lid_t lid, next, llid, k; |
905 | struct metapage *mp; | 905 | struct metapage *mp; |
906 | struct jfs_log *log; | 906 | struct jfs_log *log; |
907 | int difft, diffp; | 907 | int difft, diffp; |
908 | unsigned long flags; | 908 | unsigned long flags; |
909 | 909 | ||
910 | jfs_info("txUnlock: tblk = 0x%p", tblk); | 910 | jfs_info("txUnlock: tblk = 0x%p", tblk); |
911 | log = JFS_SBI(tblk->sb)->log; | 911 | log = JFS_SBI(tblk->sb)->log; |
912 | 912 | ||
913 | /* | 913 | /* |
914 | * mark page under tlock homeok (its log has been written): | 914 | * mark page under tlock homeok (its log has been written): |
915 | */ | 915 | */ |
916 | for (lid = tblk->next; lid; lid = next) { | 916 | for (lid = tblk->next; lid; lid = next) { |
917 | tlck = lid_to_tlock(lid); | 917 | tlck = lid_to_tlock(lid); |
918 | next = tlck->next; | 918 | next = tlck->next; |
919 | 919 | ||
920 | jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); | 920 | jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); |
921 | 921 | ||
922 | /* unbind page from tlock */ | 922 | /* unbind page from tlock */ |
923 | if ((mp = tlck->mp) != NULL && | 923 | if ((mp = tlck->mp) != NULL && |
924 | (tlck->type & tlckBTROOT) == 0) { | 924 | (tlck->type & tlckBTROOT) == 0) { |
925 | assert(mp->xflag & COMMIT_PAGE); | 925 | assert(mp->xflag & COMMIT_PAGE); |
926 | 926 | ||
927 | /* hold buffer | 927 | /* hold buffer |
928 | */ | 928 | */ |
929 | hold_metapage(mp); | 929 | hold_metapage(mp); |
930 | 930 | ||
931 | assert(mp->nohomeok > 0); | 931 | assert(mp->nohomeok > 0); |
932 | _metapage_homeok(mp); | 932 | _metapage_homeok(mp); |
933 | 933 | ||
934 | /* inherit younger/larger clsn */ | 934 | /* inherit younger/larger clsn */ |
935 | LOGSYNC_LOCK(log, flags); | 935 | LOGSYNC_LOCK(log, flags); |
936 | if (mp->clsn) { | 936 | if (mp->clsn) { |
937 | logdiff(difft, tblk->clsn, log); | 937 | logdiff(difft, tblk->clsn, log); |
938 | logdiff(diffp, mp->clsn, log); | 938 | logdiff(diffp, mp->clsn, log); |
939 | if (difft > diffp) | 939 | if (difft > diffp) |
940 | mp->clsn = tblk->clsn; | 940 | mp->clsn = tblk->clsn; |
941 | } else | 941 | } else |
942 | mp->clsn = tblk->clsn; | 942 | mp->clsn = tblk->clsn; |
943 | LOGSYNC_UNLOCK(log, flags); | 943 | LOGSYNC_UNLOCK(log, flags); |
944 | 944 | ||
945 | assert(!(tlck->flag & tlckFREEPAGE)); | 945 | assert(!(tlck->flag & tlckFREEPAGE)); |
946 | 946 | ||
947 | put_metapage(mp); | 947 | put_metapage(mp); |
948 | } | 948 | } |
949 | 949 | ||
950 | /* insert tlock, and linelock(s) of the tlock if any, | 950 | /* insert tlock, and linelock(s) of the tlock if any, |
951 | * at head of freelist | 951 | * at head of freelist |
952 | */ | 952 | */ |
953 | TXN_LOCK(); | 953 | TXN_LOCK(); |
954 | 954 | ||
955 | llid = ((struct linelock *) & tlck->lock)->next; | 955 | llid = ((struct linelock *) & tlck->lock)->next; |
956 | while (llid) { | 956 | while (llid) { |
957 | linelock = (struct linelock *) lid_to_tlock(llid); | 957 | linelock = (struct linelock *) lid_to_tlock(llid); |
958 | k = linelock->next; | 958 | k = linelock->next; |
959 | txLockFree(llid); | 959 | txLockFree(llid); |
960 | llid = k; | 960 | llid = k; |
961 | } | 961 | } |
962 | txLockFree(lid); | 962 | txLockFree(lid); |
963 | 963 | ||
964 | TXN_UNLOCK(); | 964 | TXN_UNLOCK(); |
965 | } | 965 | } |
966 | tblk->next = tblk->last = 0; | 966 | tblk->next = tblk->last = 0; |
967 | 967 | ||
968 | /* | 968 | /* |
969 | * remove tblock from logsynclist | 969 | * remove tblock from logsynclist |
970 | * (allocation map pages inherited lsn of tblk and | 970 | * (allocation map pages inherited lsn of tblk and |
971 | * has been inserted in logsync list at txUpdateMap()) | 971 | * has been inserted in logsync list at txUpdateMap()) |
972 | */ | 972 | */ |
973 | if (tblk->lsn) { | 973 | if (tblk->lsn) { |
974 | LOGSYNC_LOCK(log, flags); | 974 | LOGSYNC_LOCK(log, flags); |
975 | log->count--; | 975 | log->count--; |
976 | list_del(&tblk->synclist); | 976 | list_del(&tblk->synclist); |
977 | LOGSYNC_UNLOCK(log, flags); | 977 | LOGSYNC_UNLOCK(log, flags); |
978 | } | 978 | } |
979 | } | 979 | } |
980 | 980 | ||
981 | /* | 981 | /* |
982 | * txMaplock() | 982 | * txMaplock() |
983 | * | 983 | * |
984 | * function: allocate a transaction lock for freed page/entry; | 984 | * function: allocate a transaction lock for freed page/entry; |
985 | * for freed page, maplock is used as xtlock/dtlock type; | 985 | * for freed page, maplock is used as xtlock/dtlock type; |
986 | */ | 986 | */ |
987 | struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) | 987 | struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) |
988 | { | 988 | { |
989 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 989 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
990 | lid_t lid; | 990 | lid_t lid; |
991 | struct tblock *tblk; | 991 | struct tblock *tblk; |
992 | struct tlock *tlck; | 992 | struct tlock *tlck; |
993 | struct maplock *maplock; | 993 | struct maplock *maplock; |
994 | 994 | ||
995 | TXN_LOCK(); | 995 | TXN_LOCK(); |
996 | 996 | ||
997 | /* | 997 | /* |
998 | * allocate a tlock | 998 | * allocate a tlock |
999 | */ | 999 | */ |
1000 | lid = txLockAlloc(); | 1000 | lid = txLockAlloc(); |
1001 | tlck = lid_to_tlock(lid); | 1001 | tlck = lid_to_tlock(lid); |
1002 | 1002 | ||
1003 | /* | 1003 | /* |
1004 | * initialize tlock | 1004 | * initialize tlock |
1005 | */ | 1005 | */ |
1006 | tlck->tid = tid; | 1006 | tlck->tid = tid; |
1007 | 1007 | ||
1008 | /* bind the tlock and the object */ | 1008 | /* bind the tlock and the object */ |
1009 | tlck->flag = tlckINODELOCK; | 1009 | tlck->flag = tlckINODELOCK; |
1010 | tlck->ip = ip; | 1010 | tlck->ip = ip; |
1011 | tlck->mp = NULL; | 1011 | tlck->mp = NULL; |
1012 | 1012 | ||
1013 | tlck->type = type; | 1013 | tlck->type = type; |
1014 | 1014 | ||
1015 | /* | 1015 | /* |
1016 | * enqueue transaction lock to transaction/inode | 1016 | * enqueue transaction lock to transaction/inode |
1017 | */ | 1017 | */ |
1018 | /* insert the tlock at tail of transaction tlock list */ | 1018 | /* insert the tlock at tail of transaction tlock list */ |
1019 | if (tid) { | 1019 | if (tid) { |
1020 | tblk = tid_to_tblock(tid); | 1020 | tblk = tid_to_tblock(tid); |
1021 | if (tblk->next) | 1021 | if (tblk->next) |
1022 | lid_to_tlock(tblk->last)->next = lid; | 1022 | lid_to_tlock(tblk->last)->next = lid; |
1023 | else | 1023 | else |
1024 | tblk->next = lid; | 1024 | tblk->next = lid; |
1025 | tlck->next = 0; | 1025 | tlck->next = 0; |
1026 | tblk->last = lid; | 1026 | tblk->last = lid; |
1027 | } | 1027 | } |
1028 | /* anonymous transaction: | 1028 | /* anonymous transaction: |
1029 | * insert the tlock at head of inode anonymous tlock list | 1029 | * insert the tlock at head of inode anonymous tlock list |
1030 | */ | 1030 | */ |
1031 | else { | 1031 | else { |
1032 | tlck->next = jfs_ip->atlhead; | 1032 | tlck->next = jfs_ip->atlhead; |
1033 | jfs_ip->atlhead = lid; | 1033 | jfs_ip->atlhead = lid; |
1034 | if (tlck->next == 0) { | 1034 | if (tlck->next == 0) { |
1035 | /* This inode's first anonymous transaction */ | 1035 | /* This inode's first anonymous transaction */ |
1036 | jfs_ip->atltail = lid; | 1036 | jfs_ip->atltail = lid; |
1037 | list_add_tail(&jfs_ip->anon_inode_list, | 1037 | list_add_tail(&jfs_ip->anon_inode_list, |
1038 | &TxAnchor.anon_list); | 1038 | &TxAnchor.anon_list); |
1039 | } | 1039 | } |
1040 | } | 1040 | } |
1041 | 1041 | ||
1042 | TXN_UNLOCK(); | 1042 | TXN_UNLOCK(); |
1043 | 1043 | ||
1044 | /* initialize type dependent area for maplock */ | 1044 | /* initialize type dependent area for maplock */ |
1045 | maplock = (struct maplock *) & tlck->lock; | 1045 | maplock = (struct maplock *) & tlck->lock; |
1046 | maplock->next = 0; | 1046 | maplock->next = 0; |
1047 | maplock->maxcnt = 0; | 1047 | maplock->maxcnt = 0; |
1048 | maplock->index = 0; | 1048 | maplock->index = 0; |
1049 | 1049 | ||
1050 | return tlck; | 1050 | return tlck; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | /* | 1053 | /* |
1054 | * txLinelock() | 1054 | * txLinelock() |
1055 | * | 1055 | * |
1056 | * function: allocate a transaction lock for log vector list | 1056 | * function: allocate a transaction lock for log vector list |
1057 | */ | 1057 | */ |
1058 | struct linelock *txLinelock(struct linelock * tlock) | 1058 | struct linelock *txLinelock(struct linelock * tlock) |
1059 | { | 1059 | { |
1060 | lid_t lid; | 1060 | lid_t lid; |
1061 | struct tlock *tlck; | 1061 | struct tlock *tlck; |
1062 | struct linelock *linelock; | 1062 | struct linelock *linelock; |
1063 | 1063 | ||
1064 | TXN_LOCK(); | 1064 | TXN_LOCK(); |
1065 | 1065 | ||
1066 | /* allocate a TxLock structure */ | 1066 | /* allocate a TxLock structure */ |
1067 | lid = txLockAlloc(); | 1067 | lid = txLockAlloc(); |
1068 | tlck = lid_to_tlock(lid); | 1068 | tlck = lid_to_tlock(lid); |
1069 | 1069 | ||
1070 | TXN_UNLOCK(); | 1070 | TXN_UNLOCK(); |
1071 | 1071 | ||
1072 | /* initialize linelock */ | 1072 | /* initialize linelock */ |
1073 | linelock = (struct linelock *) tlck; | 1073 | linelock = (struct linelock *) tlck; |
1074 | linelock->next = 0; | 1074 | linelock->next = 0; |
1075 | linelock->flag = tlckLINELOCK; | 1075 | linelock->flag = tlckLINELOCK; |
1076 | linelock->maxcnt = TLOCKLONG; | 1076 | linelock->maxcnt = TLOCKLONG; |
1077 | linelock->index = 0; | 1077 | linelock->index = 0; |
1078 | 1078 | ||
1079 | /* append linelock after tlock */ | 1079 | /* append linelock after tlock */ |
1080 | linelock->next = tlock->next; | 1080 | linelock->next = tlock->next; |
1081 | tlock->next = lid; | 1081 | tlock->next = lid; |
1082 | 1082 | ||
1083 | return linelock; | 1083 | return linelock; |
1084 | } | 1084 | } |
1085 | 1085 | ||
1086 | /* | 1086 | /* |
1087 | * transaction commit management | 1087 | * transaction commit management |
1088 | * ----------------------------- | 1088 | * ----------------------------- |
1089 | */ | 1089 | */ |
1090 | 1090 | ||
1091 | /* | 1091 | /* |
1092 | * NAME: txCommit() | 1092 | * NAME: txCommit() |
1093 | * | 1093 | * |
1094 | * FUNCTION: commit the changes to the objects specified in | 1094 | * FUNCTION: commit the changes to the objects specified in |
1095 | * clist. For journalled segments only the | 1095 | * clist. For journalled segments only the |
1096 | * changes of the caller are committed, ie by tid. | 1096 | * changes of the caller are committed, ie by tid. |
1097 | * for non-journalled segments the data are flushed to | 1097 | * for non-journalled segments the data are flushed to |
1098 | * disk and then the change to the disk inode and indirect | 1098 | * disk and then the change to the disk inode and indirect |
1099 | * blocks committed (so blocks newly allocated to the | 1099 | * blocks committed (so blocks newly allocated to the |
1100 | * segment will be made a part of the segment atomically). | 1100 | * segment will be made a part of the segment atomically). |
1101 | * | 1101 | * |
1102 | * all of the segments specified in clist must be in | 1102 | * all of the segments specified in clist must be in |
1103 | * one file system. no more than 6 segments are needed | 1103 | * one file system. no more than 6 segments are needed |
1104 | * to handle all unix svcs. | 1104 | * to handle all unix svcs. |
1105 | * | 1105 | * |
1106 | * if the i_nlink field (i.e. disk inode link count) | 1106 | * if the i_nlink field (i.e. disk inode link count) |
1107 | * is zero, and the type of inode is a regular file or | 1107 | * is zero, and the type of inode is a regular file or |
1108 | * directory, or symbolic link , the inode is truncated | 1108 | * directory, or symbolic link , the inode is truncated |
1109 | * to zero length. the truncation is committed but the | 1109 | * to zero length. the truncation is committed but the |
1110 | * VM resources are unaffected until it is closed (see | 1110 | * VM resources are unaffected until it is closed (see |
1111 | * iput and iclose). | 1111 | * iput and iclose). |
1112 | * | 1112 | * |
1113 | * PARAMETER: | 1113 | * PARAMETER: |
1114 | * | 1114 | * |
1115 | * RETURN: | 1115 | * RETURN: |
1116 | * | 1116 | * |
1117 | * serialization: | 1117 | * serialization: |
1118 | * on entry the inode lock on each segment is assumed | 1118 | * on entry the inode lock on each segment is assumed |
1119 | * to be held. | 1119 | * to be held. |
1120 | * | 1120 | * |
1121 | * i/o error: | 1121 | * i/o error: |
1122 | */ | 1122 | */ |
1123 | int txCommit(tid_t tid, /* transaction identifier */ | 1123 | int txCommit(tid_t tid, /* transaction identifier */ |
1124 | int nip, /* number of inodes to commit */ | 1124 | int nip, /* number of inodes to commit */ |
1125 | struct inode **iplist, /* list of inode to commit */ | 1125 | struct inode **iplist, /* list of inode to commit */ |
1126 | int flag) | 1126 | int flag) |
1127 | { | 1127 | { |
1128 | int rc = 0; | 1128 | int rc = 0; |
1129 | struct commit cd; | 1129 | struct commit cd; |
1130 | struct jfs_log *log; | 1130 | struct jfs_log *log; |
1131 | struct tblock *tblk; | 1131 | struct tblock *tblk; |
1132 | struct lrd *lrd; | 1132 | struct lrd *lrd; |
1133 | int lsn; | 1133 | int lsn; |
1134 | struct inode *ip; | 1134 | struct inode *ip; |
1135 | struct jfs_inode_info *jfs_ip; | 1135 | struct jfs_inode_info *jfs_ip; |
1136 | int k, n; | 1136 | int k, n; |
1137 | ino_t top; | 1137 | ino_t top; |
1138 | struct super_block *sb; | 1138 | struct super_block *sb; |
1139 | 1139 | ||
1140 | jfs_info("txCommit, tid = %d, flag = %d", tid, flag); | 1140 | jfs_info("txCommit, tid = %d, flag = %d", tid, flag); |
1141 | /* is read-only file system ? */ | 1141 | /* is read-only file system ? */ |
1142 | if (isReadOnly(iplist[0])) { | 1142 | if (isReadOnly(iplist[0])) { |
1143 | rc = -EROFS; | 1143 | rc = -EROFS; |
1144 | goto TheEnd; | 1144 | goto TheEnd; |
1145 | } | 1145 | } |
1146 | 1146 | ||
1147 | sb = cd.sb = iplist[0]->i_sb; | 1147 | sb = cd.sb = iplist[0]->i_sb; |
1148 | cd.tid = tid; | 1148 | cd.tid = tid; |
1149 | 1149 | ||
1150 | if (tid == 0) | 1150 | if (tid == 0) |
1151 | tid = txBegin(sb, 0); | 1151 | tid = txBegin(sb, 0); |
1152 | tblk = tid_to_tblock(tid); | 1152 | tblk = tid_to_tblock(tid); |
1153 | 1153 | ||
1154 | /* | 1154 | /* |
1155 | * initialize commit structure | 1155 | * initialize commit structure |
1156 | */ | 1156 | */ |
1157 | log = JFS_SBI(sb)->log; | 1157 | log = JFS_SBI(sb)->log; |
1158 | cd.log = log; | 1158 | cd.log = log; |
1159 | 1159 | ||
1160 | /* initialize log record descriptor in commit */ | 1160 | /* initialize log record descriptor in commit */ |
1161 | lrd = &cd.lrd; | 1161 | lrd = &cd.lrd; |
1162 | lrd->logtid = cpu_to_le32(tblk->logtid); | 1162 | lrd->logtid = cpu_to_le32(tblk->logtid); |
1163 | lrd->backchain = 0; | 1163 | lrd->backchain = 0; |
1164 | 1164 | ||
1165 | tblk->xflag |= flag; | 1165 | tblk->xflag |= flag; |
1166 | 1166 | ||
1167 | if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) | 1167 | if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) |
1168 | tblk->xflag |= COMMIT_LAZY; | 1168 | tblk->xflag |= COMMIT_LAZY; |
1169 | /* | 1169 | /* |
1170 | * prepare non-journaled objects for commit | 1170 | * prepare non-journaled objects for commit |
1171 | * | 1171 | * |
1172 | * flush data pages of non-journaled file | 1172 | * flush data pages of non-journaled file |
1173 | * to prevent the file getting non-initialized disk blocks | 1173 | * to prevent the file getting non-initialized disk blocks |
1174 | * in case of crash. | 1174 | * in case of crash. |
1175 | * (new blocks - ) | 1175 | * (new blocks - ) |
1176 | */ | 1176 | */ |
1177 | cd.iplist = iplist; | 1177 | cd.iplist = iplist; |
1178 | cd.nip = nip; | 1178 | cd.nip = nip; |
1179 | 1179 | ||
1180 | /* | 1180 | /* |
1181 | * acquire transaction lock on (on-disk) inodes | 1181 | * acquire transaction lock on (on-disk) inodes |
1182 | * | 1182 | * |
1183 | * update on-disk inode from in-memory inode | 1183 | * update on-disk inode from in-memory inode |
1184 | * acquiring transaction locks for AFTER records | 1184 | * acquiring transaction locks for AFTER records |
1185 | * on the on-disk inode of file object | 1185 | * on the on-disk inode of file object |
1186 | * | 1186 | * |
1187 | * sort the inodes array by inode number in descending order | 1187 | * sort the inodes array by inode number in descending order |
1188 | * to prevent deadlock when acquiring transaction lock | 1188 | * to prevent deadlock when acquiring transaction lock |
1189 | * of on-disk inodes on multiple on-disk inode pages by | 1189 | * of on-disk inodes on multiple on-disk inode pages by |
1190 | * multiple concurrent transactions | 1190 | * multiple concurrent transactions |
1191 | */ | 1191 | */ |
1192 | for (k = 0; k < cd.nip; k++) { | 1192 | for (k = 0; k < cd.nip; k++) { |
1193 | top = (cd.iplist[k])->i_ino; | 1193 | top = (cd.iplist[k])->i_ino; |
1194 | for (n = k + 1; n < cd.nip; n++) { | 1194 | for (n = k + 1; n < cd.nip; n++) { |
1195 | ip = cd.iplist[n]; | 1195 | ip = cd.iplist[n]; |
1196 | if (ip->i_ino > top) { | 1196 | if (ip->i_ino > top) { |
1197 | top = ip->i_ino; | 1197 | top = ip->i_ino; |
1198 | cd.iplist[n] = cd.iplist[k]; | 1198 | cd.iplist[n] = cd.iplist[k]; |
1199 | cd.iplist[k] = ip; | 1199 | cd.iplist[k] = ip; |
1200 | } | 1200 | } |
1201 | } | 1201 | } |
1202 | 1202 | ||
1203 | ip = cd.iplist[k]; | 1203 | ip = cd.iplist[k]; |
1204 | jfs_ip = JFS_IP(ip); | 1204 | jfs_ip = JFS_IP(ip); |
1205 | 1205 | ||
1206 | /* | 1206 | /* |
1207 | * BUGBUG - This code has temporarily been removed. The | 1207 | * BUGBUG - This code has temporarily been removed. The |
1208 | * intent is to ensure that any file data is written before | 1208 | * intent is to ensure that any file data is written before |
1209 | * the metadata is committed to the journal. This prevents | 1209 | * the metadata is committed to the journal. This prevents |
1210 | * uninitialized data from appearing in a file after the | 1210 | * uninitialized data from appearing in a file after the |
1211 | * journal has been replayed. (The uninitialized data | 1211 | * journal has been replayed. (The uninitialized data |
1212 | * could be sensitive data removed by another user.) | 1212 | * could be sensitive data removed by another user.) |
1213 | * | 1213 | * |
1214 | * The problem now is that we are holding the IWRITELOCK | 1214 | * The problem now is that we are holding the IWRITELOCK |
1215 | * on the inode, and calling filemap_fdatawrite on an | 1215 | * on the inode, and calling filemap_fdatawrite on an |
1216 | * unmapped page will cause a deadlock in jfs_get_block. | 1216 | * unmapped page will cause a deadlock in jfs_get_block. |
1217 | * | 1217 | * |
1218 | * The long term solution is to pare down the use of | 1218 | * The long term solution is to pare down the use of |
1219 | * IWRITELOCK. We are currently holding it too long. | 1219 | * IWRITELOCK. We are currently holding it too long. |
1220 | * We could also be smarter about which data pages need | 1220 | * We could also be smarter about which data pages need |
1221 | * to be written before the transaction is committed and | 1221 | * to be written before the transaction is committed and |
1222 | * when we don't need to worry about it at all. | 1222 | * when we don't need to worry about it at all. |
1223 | * | 1223 | * |
1224 | * if ((!S_ISDIR(ip->i_mode)) | 1224 | * if ((!S_ISDIR(ip->i_mode)) |
1225 | * && (tblk->flag & COMMIT_DELETE) == 0) { | 1225 | * && (tblk->flag & COMMIT_DELETE) == 0) { |
1226 | * filemap_fdatawrite(ip->i_mapping); | 1226 | * filemap_fdatawrite(ip->i_mapping); |
1227 | * filemap_fdatawait(ip->i_mapping); | 1227 | * filemap_fdatawait(ip->i_mapping); |
1228 | * } | 1228 | * } |
1229 | */ | 1229 | */ |
1230 | 1230 | ||
1231 | /* | 1231 | /* |
1232 | * Mark inode as not dirty. It will still be on the dirty | 1232 | * Mark inode as not dirty. It will still be on the dirty |
1233 | * inode list, but we'll know not to commit it again unless | 1233 | * inode list, but we'll know not to commit it again unless |
1234 | * it gets marked dirty again | 1234 | * it gets marked dirty again |
1235 | */ | 1235 | */ |
1236 | clear_cflag(COMMIT_Dirty, ip); | 1236 | clear_cflag(COMMIT_Dirty, ip); |
1237 | 1237 | ||
1238 | /* inherit anonymous tlock(s) of inode */ | 1238 | /* inherit anonymous tlock(s) of inode */ |
1239 | if (jfs_ip->atlhead) { | 1239 | if (jfs_ip->atlhead) { |
1240 | lid_to_tlock(jfs_ip->atltail)->next = tblk->next; | 1240 | lid_to_tlock(jfs_ip->atltail)->next = tblk->next; |
1241 | tblk->next = jfs_ip->atlhead; | 1241 | tblk->next = jfs_ip->atlhead; |
1242 | if (!tblk->last) | 1242 | if (!tblk->last) |
1243 | tblk->last = jfs_ip->atltail; | 1243 | tblk->last = jfs_ip->atltail; |
1244 | jfs_ip->atlhead = jfs_ip->atltail = 0; | 1244 | jfs_ip->atlhead = jfs_ip->atltail = 0; |
1245 | TXN_LOCK(); | 1245 | TXN_LOCK(); |
1246 | list_del_init(&jfs_ip->anon_inode_list); | 1246 | list_del_init(&jfs_ip->anon_inode_list); |
1247 | TXN_UNLOCK(); | 1247 | TXN_UNLOCK(); |
1248 | } | 1248 | } |
1249 | 1249 | ||
1250 | /* | 1250 | /* |
1251 | * acquire transaction lock on on-disk inode page | 1251 | * acquire transaction lock on on-disk inode page |
1252 | * (become first tlock of the tblk's tlock list) | 1252 | * (become first tlock of the tblk's tlock list) |
1253 | */ | 1253 | */ |
1254 | if (((rc = diWrite(tid, ip)))) | 1254 | if (((rc = diWrite(tid, ip)))) |
1255 | goto out; | 1255 | goto out; |
1256 | } | 1256 | } |
1257 | 1257 | ||
1258 | /* | 1258 | /* |
1259 | * write log records from transaction locks | 1259 | * write log records from transaction locks |
1260 | * | 1260 | * |
1261 | * txUpdateMap() resets XAD_NEW in XAD. | 1261 | * txUpdateMap() resets XAD_NEW in XAD. |
1262 | */ | 1262 | */ |
1263 | if ((rc = txLog(log, tblk, &cd))) | 1263 | if ((rc = txLog(log, tblk, &cd))) |
1264 | goto TheEnd; | 1264 | goto TheEnd; |
1265 | 1265 | ||
1266 | /* | 1266 | /* |
1267 | * Ensure that inode isn't reused before | 1267 | * Ensure that inode isn't reused before |
1268 | * lazy commit thread finishes processing | 1268 | * lazy commit thread finishes processing |
1269 | */ | 1269 | */ |
1270 | if (tblk->xflag & COMMIT_DELETE) { | 1270 | if (tblk->xflag & COMMIT_DELETE) { |
1271 | atomic_inc(&tblk->u.ip->i_count); | 1271 | atomic_inc(&tblk->u.ip->i_count); |
1272 | /* | 1272 | /* |
1273 | * Avoid a rare deadlock | 1273 | * Avoid a rare deadlock |
1274 | * | 1274 | * |
1275 | * If the inode is locked, we may be blocked in | 1275 | * If the inode is locked, we may be blocked in |
1276 | * jfs_commit_inode. If so, we don't want the | 1276 | * jfs_commit_inode. If so, we don't want the |
1277 | * lazy_commit thread doing the last iput() on the inode | 1277 | * lazy_commit thread doing the last iput() on the inode |
1278 | * since that may block on the locked inode. Instead, | 1278 | * since that may block on the locked inode. Instead, |
1279 | * commit the transaction synchronously, so the last iput | 1279 | * commit the transaction synchronously, so the last iput |
1280 | * will be done by the calling thread (or later) | 1280 | * will be done by the calling thread (or later) |
1281 | */ | 1281 | */ |
1282 | if (tblk->u.ip->i_state & I_LOCK) | 1282 | if (tblk->u.ip->i_state & I_LOCK) |
1283 | tblk->xflag &= ~COMMIT_LAZY; | 1283 | tblk->xflag &= ~COMMIT_LAZY; |
1284 | } | 1284 | } |
1285 | 1285 | ||
1286 | ASSERT((!(tblk->xflag & COMMIT_DELETE)) || | 1286 | ASSERT((!(tblk->xflag & COMMIT_DELETE)) || |
1287 | ((tblk->u.ip->i_nlink == 0) && | 1287 | ((tblk->u.ip->i_nlink == 0) && |
1288 | !test_cflag(COMMIT_Nolink, tblk->u.ip))); | 1288 | !test_cflag(COMMIT_Nolink, tblk->u.ip))); |
1289 | 1289 | ||
1290 | /* | 1290 | /* |
1291 | * write COMMIT log record | 1291 | * write COMMIT log record |
1292 | */ | 1292 | */ |
1293 | lrd->type = cpu_to_le16(LOG_COMMIT); | 1293 | lrd->type = cpu_to_le16(LOG_COMMIT); |
1294 | lrd->length = 0; | 1294 | lrd->length = 0; |
1295 | lsn = lmLog(log, tblk, lrd, NULL); | 1295 | lsn = lmLog(log, tblk, lrd, NULL); |
1296 | 1296 | ||
1297 | lmGroupCommit(log, tblk); | 1297 | lmGroupCommit(log, tblk); |
1298 | 1298 | ||
1299 | /* | 1299 | /* |
1300 | * - transaction is now committed - | 1300 | * - transaction is now committed - |
1301 | */ | 1301 | */ |
1302 | 1302 | ||
1303 | /* | 1303 | /* |
1304 | * force pages in careful update | 1304 | * force pages in careful update |
1305 | * (imap addressing structure update) | 1305 | * (imap addressing structure update) |
1306 | */ | 1306 | */ |
1307 | if (flag & COMMIT_FORCE) | 1307 | if (flag & COMMIT_FORCE) |
1308 | txForce(tblk); | 1308 | txForce(tblk); |
1309 | 1309 | ||
1310 | /* | 1310 | /* |
1311 | * update allocation map. | 1311 | * update allocation map. |
1312 | * | 1312 | * |
1313 | * update inode allocation map and inode: | 1313 | * update inode allocation map and inode: |
1314 | * free pager lock on memory object of inode if any. | 1314 | * free pager lock on memory object of inode if any. |
1315 | * update block allocation map. | 1315 | * update block allocation map. |
1316 | * | 1316 | * |
1317 | * txUpdateMap() resets XAD_NEW in XAD. | 1317 | * txUpdateMap() resets XAD_NEW in XAD. |
1318 | */ | 1318 | */ |
1319 | if (tblk->xflag & COMMIT_FORCE) | 1319 | if (tblk->xflag & COMMIT_FORCE) |
1320 | txUpdateMap(tblk); | 1320 | txUpdateMap(tblk); |
1321 | 1321 | ||
1322 | /* | 1322 | /* |
1323 | * free transaction locks and pageout/free pages | 1323 | * free transaction locks and pageout/free pages |
1324 | */ | 1324 | */ |
1325 | txRelease(tblk); | 1325 | txRelease(tblk); |
1326 | 1326 | ||
1327 | if ((tblk->flag & tblkGC_LAZY) == 0) | 1327 | if ((tblk->flag & tblkGC_LAZY) == 0) |
1328 | txUnlock(tblk); | 1328 | txUnlock(tblk); |
1329 | 1329 | ||
1330 | 1330 | ||
1331 | /* | 1331 | /* |
1332 | * reset in-memory object state | 1332 | * reset in-memory object state |
1333 | */ | 1333 | */ |
1334 | for (k = 0; k < cd.nip; k++) { | 1334 | for (k = 0; k < cd.nip; k++) { |
1335 | ip = cd.iplist[k]; | 1335 | ip = cd.iplist[k]; |
1336 | jfs_ip = JFS_IP(ip); | 1336 | jfs_ip = JFS_IP(ip); |
1337 | 1337 | ||
1338 | /* | 1338 | /* |
1339 | * reset in-memory inode state | 1339 | * reset in-memory inode state |
1340 | */ | 1340 | */ |
1341 | jfs_ip->bxflag = 0; | 1341 | jfs_ip->bxflag = 0; |
1342 | jfs_ip->blid = 0; | 1342 | jfs_ip->blid = 0; |
1343 | } | 1343 | } |
1344 | 1344 | ||
1345 | out: | 1345 | out: |
1346 | if (rc != 0) | 1346 | if (rc != 0) |
1347 | txAbort(tid, 1); | 1347 | txAbort(tid, 1); |
1348 | 1348 | ||
1349 | TheEnd: | 1349 | TheEnd: |
1350 | jfs_info("txCommit: tid = %d, returning %d", tid, rc); | 1350 | jfs_info("txCommit: tid = %d, returning %d", tid, rc); |
1351 | return rc; | 1351 | return rc; |
1352 | } | 1352 | } |
1353 | 1353 | ||
1354 | /* | 1354 | /* |
1355 | * NAME: txLog() | 1355 | * NAME: txLog() |
1356 | * | 1356 | * |
1357 | * FUNCTION: Writes AFTER log records for all lines modified | 1357 | * FUNCTION: Writes AFTER log records for all lines modified |
1358 | * by tid for segments specified by inodes in comdata. | 1358 | * by tid for segments specified by inodes in comdata. |
1359 | * Code assumes only WRITELOCKS are recorded in lockwords. | 1359 | * Code assumes only WRITELOCKS are recorded in lockwords. |
1360 | * | 1360 | * |
1361 | * PARAMETERS: | 1361 | * PARAMETERS: |
1362 | * | 1362 | * |
1363 | * RETURN : | 1363 | * RETURN : |
1364 | */ | 1364 | */ |
1365 | static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) | 1365 | static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) |
1366 | { | 1366 | { |
1367 | int rc = 0; | 1367 | int rc = 0; |
1368 | struct inode *ip; | 1368 | struct inode *ip; |
1369 | lid_t lid; | 1369 | lid_t lid; |
1370 | struct tlock *tlck; | 1370 | struct tlock *tlck; |
1371 | struct lrd *lrd = &cd->lrd; | 1371 | struct lrd *lrd = &cd->lrd; |
1372 | 1372 | ||
1373 | /* | 1373 | /* |
1374 | * write log record(s) for each tlock of transaction, | 1374 | * write log record(s) for each tlock of transaction, |
1375 | */ | 1375 | */ |
1376 | for (lid = tblk->next; lid; lid = tlck->next) { | 1376 | for (lid = tblk->next; lid; lid = tlck->next) { |
1377 | tlck = lid_to_tlock(lid); | 1377 | tlck = lid_to_tlock(lid); |
1378 | 1378 | ||
1379 | tlck->flag |= tlckLOG; | 1379 | tlck->flag |= tlckLOG; |
1380 | 1380 | ||
1381 | /* initialize lrd common */ | 1381 | /* initialize lrd common */ |
1382 | ip = tlck->ip; | 1382 | ip = tlck->ip; |
1383 | lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); | 1383 | lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); |
1384 | lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); | 1384 | lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); |
1385 | lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); | 1385 | lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); |
1386 | 1386 | ||
1387 | /* write log record of page from the tlock */ | 1387 | /* write log record of page from the tlock */ |
1388 | switch (tlck->type & tlckTYPE) { | 1388 | switch (tlck->type & tlckTYPE) { |
1389 | case tlckXTREE: | 1389 | case tlckXTREE: |
1390 | xtLog(log, tblk, lrd, tlck); | 1390 | xtLog(log, tblk, lrd, tlck); |
1391 | break; | 1391 | break; |
1392 | 1392 | ||
1393 | case tlckDTREE: | 1393 | case tlckDTREE: |
1394 | dtLog(log, tblk, lrd, tlck); | 1394 | dtLog(log, tblk, lrd, tlck); |
1395 | break; | 1395 | break; |
1396 | 1396 | ||
1397 | case tlckINODE: | 1397 | case tlckINODE: |
1398 | diLog(log, tblk, lrd, tlck, cd); | 1398 | diLog(log, tblk, lrd, tlck, cd); |
1399 | break; | 1399 | break; |
1400 | 1400 | ||
1401 | case tlckMAP: | 1401 | case tlckMAP: |
1402 | mapLog(log, tblk, lrd, tlck); | 1402 | mapLog(log, tblk, lrd, tlck); |
1403 | break; | 1403 | break; |
1404 | 1404 | ||
1405 | case tlckDATA: | 1405 | case tlckDATA: |
1406 | dataLog(log, tblk, lrd, tlck); | 1406 | dataLog(log, tblk, lrd, tlck); |
1407 | break; | 1407 | break; |
1408 | 1408 | ||
1409 | default: | 1409 | default: |
1410 | jfs_err("UFO tlock:0x%p", tlck); | 1410 | jfs_err("UFO tlock:0x%p", tlck); |
1411 | } | 1411 | } |
1412 | } | 1412 | } |
1413 | 1413 | ||
1414 | return rc; | 1414 | return rc; |
1415 | } | 1415 | } |
1416 | 1416 | ||
1417 | /* | 1417 | /* |
1418 | * diLog() | 1418 | * diLog() |
1419 | * | 1419 | * |
1420 | * function: log inode tlock and format maplock to update bmap; | 1420 | * function: log inode tlock and format maplock to update bmap; |
1421 | */ | 1421 | */ |
1422 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 1422 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
1423 | struct tlock * tlck, struct commit * cd) | 1423 | struct tlock * tlck, struct commit * cd) |
1424 | { | 1424 | { |
1425 | int rc = 0; | 1425 | int rc = 0; |
1426 | struct metapage *mp; | 1426 | struct metapage *mp; |
1427 | pxd_t *pxd; | 1427 | pxd_t *pxd; |
1428 | struct pxd_lock *pxdlock; | 1428 | struct pxd_lock *pxdlock; |
1429 | 1429 | ||
1430 | mp = tlck->mp; | 1430 | mp = tlck->mp; |
1431 | 1431 | ||
1432 | /* initialize as REDOPAGE record format */ | 1432 | /* initialize as REDOPAGE record format */ |
1433 | lrd->log.redopage.type = cpu_to_le16(LOG_INODE); | 1433 | lrd->log.redopage.type = cpu_to_le16(LOG_INODE); |
1434 | lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); | 1434 | lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); |
1435 | 1435 | ||
1436 | pxd = &lrd->log.redopage.pxd; | 1436 | pxd = &lrd->log.redopage.pxd; |
1437 | 1437 | ||
1438 | /* | 1438 | /* |
1439 | * inode after image | 1439 | * inode after image |
1440 | */ | 1440 | */ |
1441 | if (tlck->type & tlckENTRY) { | 1441 | if (tlck->type & tlckENTRY) { |
1442 | /* log after-image for logredo(): */ | 1442 | /* log after-image for logredo(): */ |
1443 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | 1443 | lrd->type = cpu_to_le16(LOG_REDOPAGE); |
1444 | PXDaddress(pxd, mp->index); | 1444 | PXDaddress(pxd, mp->index); |
1445 | PXDlength(pxd, | 1445 | PXDlength(pxd, |
1446 | mp->logical_size >> tblk->sb->s_blocksize_bits); | 1446 | mp->logical_size >> tblk->sb->s_blocksize_bits); |
1447 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1447 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1448 | 1448 | ||
1449 | /* mark page as homeward bound */ | 1449 | /* mark page as homeward bound */ |
1450 | tlck->flag |= tlckWRITEPAGE; | 1450 | tlck->flag |= tlckWRITEPAGE; |
1451 | } else if (tlck->type & tlckFREE) { | 1451 | } else if (tlck->type & tlckFREE) { |
1452 | /* | 1452 | /* |
1453 | * free inode extent | 1453 | * free inode extent |
1454 | * | 1454 | * |
1455 | * (pages of the freed inode extent have been invalidated and | 1455 | * (pages of the freed inode extent have been invalidated and |
1456 | * a maplock for free of the extent has been formatted at | 1456 | * a maplock for free of the extent has been formatted at |
1457 | * txLock() time); | 1457 | * txLock() time); |
1458 | * | 1458 | * |
1459 | * the tlock had been acquired on the inode allocation map page | 1459 | * the tlock had been acquired on the inode allocation map page |
1460 | * (iag) that specifies the freed extent, even though the map | 1460 | * (iag) that specifies the freed extent, even though the map |
1461 | * page is not itself logged, to prevent pageout of the map | 1461 | * page is not itself logged, to prevent pageout of the map |
1462 | * page before the log; | 1462 | * page before the log; |
1463 | */ | 1463 | */ |
1464 | 1464 | ||
1465 | /* log LOG_NOREDOINOEXT of the freed inode extent for | 1465 | /* log LOG_NOREDOINOEXT of the freed inode extent for |
1466 | * logredo() to start NoRedoPage filters, and to update | 1466 | * logredo() to start NoRedoPage filters, and to update |
1467 | * imap and bmap for free of the extent; | 1467 | * imap and bmap for free of the extent; |
1468 | */ | 1468 | */ |
1469 | lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); | 1469 | lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); |
1470 | /* | 1470 | /* |
1471 | * For the LOG_NOREDOINOEXT record, we need | 1471 | * For the LOG_NOREDOINOEXT record, we need |
1472 | * to pass the IAG number and inode extent | 1472 | * to pass the IAG number and inode extent |
1473 | * index (within that IAG) from which the | 1473 | * index (within that IAG) from which the |
1474 | * the extent being released. These have been | 1474 | * the extent being released. These have been |
1475 | * passed to us in the iplist[1] and iplist[2]. | 1475 | * passed to us in the iplist[1] and iplist[2]. |
1476 | */ | 1476 | */ |
1477 | lrd->log.noredoinoext.iagnum = | 1477 | lrd->log.noredoinoext.iagnum = |
1478 | cpu_to_le32((u32) (size_t) cd->iplist[1]); | 1478 | cpu_to_le32((u32) (size_t) cd->iplist[1]); |
1479 | lrd->log.noredoinoext.inoext_idx = | 1479 | lrd->log.noredoinoext.inoext_idx = |
1480 | cpu_to_le32((u32) (size_t) cd->iplist[2]); | 1480 | cpu_to_le32((u32) (size_t) cd->iplist[2]); |
1481 | 1481 | ||
1482 | pxdlock = (struct pxd_lock *) & tlck->lock; | 1482 | pxdlock = (struct pxd_lock *) & tlck->lock; |
1483 | *pxd = pxdlock->pxd; | 1483 | *pxd = pxdlock->pxd; |
1484 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | 1484 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); |
1485 | 1485 | ||
1486 | /* update bmap */ | 1486 | /* update bmap */ |
1487 | tlck->flag |= tlckUPDATEMAP; | 1487 | tlck->flag |= tlckUPDATEMAP; |
1488 | 1488 | ||
1489 | /* mark page as homeward bound */ | 1489 | /* mark page as homeward bound */ |
1490 | tlck->flag |= tlckWRITEPAGE; | 1490 | tlck->flag |= tlckWRITEPAGE; |
1491 | } else | 1491 | } else |
1492 | jfs_err("diLog: UFO type tlck:0x%p", tlck); | 1492 | jfs_err("diLog: UFO type tlck:0x%p", tlck); |
1493 | #ifdef _JFS_WIP | 1493 | #ifdef _JFS_WIP |
1494 | /* | 1494 | /* |
1495 | * alloc/free external EA extent | 1495 | * alloc/free external EA extent |
1496 | * | 1496 | * |
1497 | * a maplock for txUpdateMap() to update bPWMAP for alloc/free | 1497 | * a maplock for txUpdateMap() to update bPWMAP for alloc/free |
1498 | * of the extent has been formatted at txLock() time; | 1498 | * of the extent has been formatted at txLock() time; |
1499 | */ | 1499 | */ |
1500 | else { | 1500 | else { |
1501 | assert(tlck->type & tlckEA); | 1501 | assert(tlck->type & tlckEA); |
1502 | 1502 | ||
1503 | /* log LOG_UPDATEMAP for logredo() to update bmap for | 1503 | /* log LOG_UPDATEMAP for logredo() to update bmap for |
1504 | * alloc of new (and free of old) external EA extent; | 1504 | * alloc of new (and free of old) external EA extent; |
1505 | */ | 1505 | */ |
1506 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | 1506 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); |
1507 | pxdlock = (struct pxd_lock *) & tlck->lock; | 1507 | pxdlock = (struct pxd_lock *) & tlck->lock; |
1508 | nlock = pxdlock->index; | 1508 | nlock = pxdlock->index; |
1509 | for (i = 0; i < nlock; i++, pxdlock++) { | 1509 | for (i = 0; i < nlock; i++, pxdlock++) { |
1510 | if (pxdlock->flag & mlckALLOCPXD) | 1510 | if (pxdlock->flag & mlckALLOCPXD) |
1511 | lrd->log.updatemap.type = | 1511 | lrd->log.updatemap.type = |
1512 | cpu_to_le16(LOG_ALLOCPXD); | 1512 | cpu_to_le16(LOG_ALLOCPXD); |
1513 | else | 1513 | else |
1514 | lrd->log.updatemap.type = | 1514 | lrd->log.updatemap.type = |
1515 | cpu_to_le16(LOG_FREEPXD); | 1515 | cpu_to_le16(LOG_FREEPXD); |
1516 | lrd->log.updatemap.nxd = cpu_to_le16(1); | 1516 | lrd->log.updatemap.nxd = cpu_to_le16(1); |
1517 | lrd->log.updatemap.pxd = pxdlock->pxd; | 1517 | lrd->log.updatemap.pxd = pxdlock->pxd; |
1518 | lrd->backchain = | 1518 | lrd->backchain = |
1519 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | 1519 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); |
1520 | } | 1520 | } |
1521 | 1521 | ||
1522 | /* update bmap */ | 1522 | /* update bmap */ |
1523 | tlck->flag |= tlckUPDATEMAP; | 1523 | tlck->flag |= tlckUPDATEMAP; |
1524 | } | 1524 | } |
1525 | #endif /* _JFS_WIP */ | 1525 | #endif /* _JFS_WIP */ |
1526 | 1526 | ||
1527 | return rc; | 1527 | return rc; |
1528 | } | 1528 | } |
1529 | 1529 | ||
1530 | /* | 1530 | /* |
1531 | * dataLog() | 1531 | * dataLog() |
1532 | * | 1532 | * |
1533 | * function: log data tlock | 1533 | * function: log data tlock |
1534 | */ | 1534 | */ |
1535 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 1535 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
1536 | struct tlock * tlck) | 1536 | struct tlock * tlck) |
1537 | { | 1537 | { |
1538 | struct metapage *mp; | 1538 | struct metapage *mp; |
1539 | pxd_t *pxd; | 1539 | pxd_t *pxd; |
1540 | 1540 | ||
1541 | mp = tlck->mp; | 1541 | mp = tlck->mp; |
1542 | 1542 | ||
1543 | /* initialize as REDOPAGE record format */ | 1543 | /* initialize as REDOPAGE record format */ |
1544 | lrd->log.redopage.type = cpu_to_le16(LOG_DATA); | 1544 | lrd->log.redopage.type = cpu_to_le16(LOG_DATA); |
1545 | lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); | 1545 | lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); |
1546 | 1546 | ||
1547 | pxd = &lrd->log.redopage.pxd; | 1547 | pxd = &lrd->log.redopage.pxd; |
1548 | 1548 | ||
1549 | /* log after-image for logredo(): */ | 1549 | /* log after-image for logredo(): */ |
1550 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | 1550 | lrd->type = cpu_to_le16(LOG_REDOPAGE); |
1551 | 1551 | ||
1552 | if (jfs_dirtable_inline(tlck->ip)) { | 1552 | if (jfs_dirtable_inline(tlck->ip)) { |
1553 | /* | 1553 | /* |
1554 | * The table has been truncated, we've must have deleted | 1554 | * The table has been truncated, we've must have deleted |
1555 | * the last entry, so don't bother logging this | 1555 | * the last entry, so don't bother logging this |
1556 | */ | 1556 | */ |
1557 | mp->lid = 0; | 1557 | mp->lid = 0; |
1558 | grab_metapage(mp); | 1558 | grab_metapage(mp); |
1559 | metapage_homeok(mp); | 1559 | metapage_homeok(mp); |
1560 | discard_metapage(mp); | 1560 | discard_metapage(mp); |
1561 | tlck->mp = NULL; | 1561 | tlck->mp = NULL; |
1562 | return 0; | 1562 | return 0; |
1563 | } | 1563 | } |
1564 | 1564 | ||
1565 | PXDaddress(pxd, mp->index); | 1565 | PXDaddress(pxd, mp->index); |
1566 | PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); | 1566 | PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); |
1567 | 1567 | ||
1568 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1568 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1569 | 1569 | ||
1570 | /* mark page as homeward bound */ | 1570 | /* mark page as homeward bound */ |
1571 | tlck->flag |= tlckWRITEPAGE; | 1571 | tlck->flag |= tlckWRITEPAGE; |
1572 | 1572 | ||
1573 | return 0; | 1573 | return 0; |
1574 | } | 1574 | } |
1575 | 1575 | ||
1576 | /* | 1576 | /* |
1577 | * dtLog() | 1577 | * dtLog() |
1578 | * | 1578 | * |
1579 | * function: log dtree tlock and format maplock to update bmap; | 1579 | * function: log dtree tlock and format maplock to update bmap; |
1580 | */ | 1580 | */ |
1581 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 1581 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
1582 | struct tlock * tlck) | 1582 | struct tlock * tlck) |
1583 | { | 1583 | { |
1584 | struct metapage *mp; | 1584 | struct metapage *mp; |
1585 | struct pxd_lock *pxdlock; | 1585 | struct pxd_lock *pxdlock; |
1586 | pxd_t *pxd; | 1586 | pxd_t *pxd; |
1587 | 1587 | ||
1588 | mp = tlck->mp; | 1588 | mp = tlck->mp; |
1589 | 1589 | ||
1590 | /* initialize as REDOPAGE/NOREDOPAGE record format */ | 1590 | /* initialize as REDOPAGE/NOREDOPAGE record format */ |
1591 | lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); | 1591 | lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); |
1592 | lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); | 1592 | lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); |
1593 | 1593 | ||
1594 | pxd = &lrd->log.redopage.pxd; | 1594 | pxd = &lrd->log.redopage.pxd; |
1595 | 1595 | ||
1596 | if (tlck->type & tlckBTROOT) | 1596 | if (tlck->type & tlckBTROOT) |
1597 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); | 1597 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); |
1598 | 1598 | ||
1599 | /* | 1599 | /* |
1600 | * page extension via relocation: entry insertion; | 1600 | * page extension via relocation: entry insertion; |
1601 | * page extension in-place: entry insertion; | 1601 | * page extension in-place: entry insertion; |
1602 | * new right page from page split, reinitialized in-line | 1602 | * new right page from page split, reinitialized in-line |
1603 | * root from root page split: entry insertion; | 1603 | * root from root page split: entry insertion; |
1604 | */ | 1604 | */ |
1605 | if (tlck->type & (tlckNEW | tlckEXTEND)) { | 1605 | if (tlck->type & (tlckNEW | tlckEXTEND)) { |
1606 | /* log after-image of the new page for logredo(): | 1606 | /* log after-image of the new page for logredo(): |
1607 | * mark log (LOG_NEW) for logredo() to initialize | 1607 | * mark log (LOG_NEW) for logredo() to initialize |
1608 | * freelist and update bmap for alloc of the new page; | 1608 | * freelist and update bmap for alloc of the new page; |
1609 | */ | 1609 | */ |
1610 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | 1610 | lrd->type = cpu_to_le16(LOG_REDOPAGE); |
1611 | if (tlck->type & tlckEXTEND) | 1611 | if (tlck->type & tlckEXTEND) |
1612 | lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); | 1612 | lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); |
1613 | else | 1613 | else |
1614 | lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); | 1614 | lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); |
1615 | PXDaddress(pxd, mp->index); | 1615 | PXDaddress(pxd, mp->index); |
1616 | PXDlength(pxd, | 1616 | PXDlength(pxd, |
1617 | mp->logical_size >> tblk->sb->s_blocksize_bits); | 1617 | mp->logical_size >> tblk->sb->s_blocksize_bits); |
1618 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1618 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1619 | 1619 | ||
1620 | /* format a maplock for txUpdateMap() to update bPMAP for | 1620 | /* format a maplock for txUpdateMap() to update bPMAP for |
1621 | * alloc of the new page; | 1621 | * alloc of the new page; |
1622 | */ | 1622 | */ |
1623 | if (tlck->type & tlckBTROOT) | 1623 | if (tlck->type & tlckBTROOT) |
1624 | return; | 1624 | return; |
1625 | tlck->flag |= tlckUPDATEMAP; | 1625 | tlck->flag |= tlckUPDATEMAP; |
1626 | pxdlock = (struct pxd_lock *) & tlck->lock; | 1626 | pxdlock = (struct pxd_lock *) & tlck->lock; |
1627 | pxdlock->flag = mlckALLOCPXD; | 1627 | pxdlock->flag = mlckALLOCPXD; |
1628 | pxdlock->pxd = *pxd; | 1628 | pxdlock->pxd = *pxd; |
1629 | 1629 | ||
1630 | pxdlock->index = 1; | 1630 | pxdlock->index = 1; |
1631 | 1631 | ||
1632 | /* mark page as homeward bound */ | 1632 | /* mark page as homeward bound */ |
1633 | tlck->flag |= tlckWRITEPAGE; | 1633 | tlck->flag |= tlckWRITEPAGE; |
1634 | return; | 1634 | return; |
1635 | } | 1635 | } |
1636 | 1636 | ||
1637 | /* | 1637 | /* |
1638 | * entry insertion/deletion, | 1638 | * entry insertion/deletion, |
1639 | * sibling page link update (old right page before split); | 1639 | * sibling page link update (old right page before split); |
1640 | */ | 1640 | */ |
1641 | if (tlck->type & (tlckENTRY | tlckRELINK)) { | 1641 | if (tlck->type & (tlckENTRY | tlckRELINK)) { |
1642 | /* log after-image for logredo(): */ | 1642 | /* log after-image for logredo(): */ |
1643 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | 1643 | lrd->type = cpu_to_le16(LOG_REDOPAGE); |
1644 | PXDaddress(pxd, mp->index); | 1644 | PXDaddress(pxd, mp->index); |
1645 | PXDlength(pxd, | 1645 | PXDlength(pxd, |
1646 | mp->logical_size >> tblk->sb->s_blocksize_bits); | 1646 | mp->logical_size >> tblk->sb->s_blocksize_bits); |
1647 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1647 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1648 | 1648 | ||
1649 | /* mark page as homeward bound */ | 1649 | /* mark page as homeward bound */ |
1650 | tlck->flag |= tlckWRITEPAGE; | 1650 | tlck->flag |= tlckWRITEPAGE; |
1651 | return; | 1651 | return; |
1652 | } | 1652 | } |
1653 | 1653 | ||
1654 | /* | 1654 | /* |
1655 | * page deletion: page has been invalidated | 1655 | * page deletion: page has been invalidated |
1656 | * page relocation: source extent | 1656 | * page relocation: source extent |
1657 | * | 1657 | * |
1658 | * a maplock for free of the page has been formatted | 1658 | * a maplock for free of the page has been formatted |
1659 | * at txLock() time); | 1659 | * at txLock() time); |
1660 | */ | 1660 | */ |
1661 | if (tlck->type & (tlckFREE | tlckRELOCATE)) { | 1661 | if (tlck->type & (tlckFREE | tlckRELOCATE)) { |
1662 | /* log LOG_NOREDOPAGE of the deleted page for logredo() | 1662 | /* log LOG_NOREDOPAGE of the deleted page for logredo() |
1663 | * to start NoRedoPage filter and to update bmap for free | 1663 | * to start NoRedoPage filter and to update bmap for free |
1664 | * of the deletd page | 1664 | * of the deletd page |
1665 | */ | 1665 | */ |
1666 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | 1666 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); |
1667 | pxdlock = (struct pxd_lock *) & tlck->lock; | 1667 | pxdlock = (struct pxd_lock *) & tlck->lock; |
1668 | *pxd = pxdlock->pxd; | 1668 | *pxd = pxdlock->pxd; |
1669 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | 1669 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); |
1670 | 1670 | ||
1671 | /* a maplock for txUpdateMap() for free of the page | 1671 | /* a maplock for txUpdateMap() for free of the page |
1672 | * has been formatted at txLock() time; | 1672 | * has been formatted at txLock() time; |
1673 | */ | 1673 | */ |
1674 | tlck->flag |= tlckUPDATEMAP; | 1674 | tlck->flag |= tlckUPDATEMAP; |
1675 | } | 1675 | } |
1676 | return; | 1676 | return; |
1677 | } | 1677 | } |
1678 | 1678 | ||
1679 | /* | 1679 | /* |
1680 | * xtLog() | 1680 | * xtLog() |
1681 | * | 1681 | * |
1682 | * function: log xtree tlock and format maplock to update bmap; | 1682 | * function: log xtree tlock and format maplock to update bmap; |
1683 | */ | 1683 | */ |
1684 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 1684 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
1685 | struct tlock * tlck) | 1685 | struct tlock * tlck) |
1686 | { | 1686 | { |
1687 | struct inode *ip; | 1687 | struct inode *ip; |
1688 | struct metapage *mp; | 1688 | struct metapage *mp; |
1689 | xtpage_t *p; | 1689 | xtpage_t *p; |
1690 | struct xtlock *xtlck; | 1690 | struct xtlock *xtlck; |
1691 | struct maplock *maplock; | 1691 | struct maplock *maplock; |
1692 | struct xdlistlock *xadlock; | 1692 | struct xdlistlock *xadlock; |
1693 | struct pxd_lock *pxdlock; | 1693 | struct pxd_lock *pxdlock; |
1694 | pxd_t *page_pxd; | 1694 | pxd_t *page_pxd; |
1695 | int next, lwm, hwm; | 1695 | int next, lwm, hwm; |
1696 | 1696 | ||
1697 | ip = tlck->ip; | 1697 | ip = tlck->ip; |
1698 | mp = tlck->mp; | 1698 | mp = tlck->mp; |
1699 | 1699 | ||
1700 | /* initialize as REDOPAGE/NOREDOPAGE record format */ | 1700 | /* initialize as REDOPAGE/NOREDOPAGE record format */ |
1701 | lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); | 1701 | lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); |
1702 | lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); | 1702 | lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); |
1703 | 1703 | ||
1704 | page_pxd = &lrd->log.redopage.pxd; | 1704 | page_pxd = &lrd->log.redopage.pxd; |
1705 | 1705 | ||
1706 | if (tlck->type & tlckBTROOT) { | 1706 | if (tlck->type & tlckBTROOT) { |
1707 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); | 1707 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); |
1708 | p = &JFS_IP(ip)->i_xtroot; | 1708 | p = &JFS_IP(ip)->i_xtroot; |
1709 | if (S_ISDIR(ip->i_mode)) | 1709 | if (S_ISDIR(ip->i_mode)) |
1710 | lrd->log.redopage.type |= | 1710 | lrd->log.redopage.type |= |
1711 | cpu_to_le16(LOG_DIR_XTREE); | 1711 | cpu_to_le16(LOG_DIR_XTREE); |
1712 | } else | 1712 | } else |
1713 | p = (xtpage_t *) mp->data; | 1713 | p = (xtpage_t *) mp->data; |
1714 | next = le16_to_cpu(p->header.nextindex); | 1714 | next = le16_to_cpu(p->header.nextindex); |
1715 | 1715 | ||
1716 | xtlck = (struct xtlock *) & tlck->lock; | 1716 | xtlck = (struct xtlock *) & tlck->lock; |
1717 | 1717 | ||
1718 | maplock = (struct maplock *) & tlck->lock; | 1718 | maplock = (struct maplock *) & tlck->lock; |
1719 | xadlock = (struct xdlistlock *) maplock; | 1719 | xadlock = (struct xdlistlock *) maplock; |
1720 | 1720 | ||
1721 | /* | 1721 | /* |
1722 | * entry insertion/extension; | 1722 | * entry insertion/extension; |
1723 | * sibling page link update (old right page before split); | 1723 | * sibling page link update (old right page before split); |
1724 | */ | 1724 | */ |
1725 | if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { | 1725 | if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { |
1726 | /* log after-image for logredo(): | 1726 | /* log after-image for logredo(): |
1727 | * logredo() will update bmap for alloc of new/extended | 1727 | * logredo() will update bmap for alloc of new/extended |
1728 | * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from | 1728 | * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from |
1729 | * after-image of XADlist; | 1729 | * after-image of XADlist; |
1730 | * logredo() resets (XAD_NEW|XAD_EXTEND) flag when | 1730 | * logredo() resets (XAD_NEW|XAD_EXTEND) flag when |
1731 | * applying the after-image to the meta-data page. | 1731 | * applying the after-image to the meta-data page. |
1732 | */ | 1732 | */ |
1733 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | 1733 | lrd->type = cpu_to_le16(LOG_REDOPAGE); |
1734 | PXDaddress(page_pxd, mp->index); | 1734 | PXDaddress(page_pxd, mp->index); |
1735 | PXDlength(page_pxd, | 1735 | PXDlength(page_pxd, |
1736 | mp->logical_size >> tblk->sb->s_blocksize_bits); | 1736 | mp->logical_size >> tblk->sb->s_blocksize_bits); |
1737 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1737 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1738 | 1738 | ||
1739 | /* format a maplock for txUpdateMap() to update bPMAP | 1739 | /* format a maplock for txUpdateMap() to update bPMAP |
1740 | * for alloc of new/extended extents of XAD[lwm:next) | 1740 | * for alloc of new/extended extents of XAD[lwm:next) |
1741 | * from the page itself; | 1741 | * from the page itself; |
1742 | * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. | 1742 | * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. |
1743 | */ | 1743 | */ |
1744 | lwm = xtlck->lwm.offset; | 1744 | lwm = xtlck->lwm.offset; |
1745 | if (lwm == 0) | 1745 | if (lwm == 0) |
1746 | lwm = XTPAGEMAXSLOT; | 1746 | lwm = XTPAGEMAXSLOT; |
1747 | 1747 | ||
1748 | if (lwm == next) | 1748 | if (lwm == next) |
1749 | goto out; | 1749 | goto out; |
1750 | if (lwm > next) { | 1750 | if (lwm > next) { |
1751 | jfs_err("xtLog: lwm > next\n"); | 1751 | jfs_err("xtLog: lwm > next\n"); |
1752 | goto out; | 1752 | goto out; |
1753 | } | 1753 | } |
1754 | tlck->flag |= tlckUPDATEMAP; | 1754 | tlck->flag |= tlckUPDATEMAP; |
1755 | xadlock->flag = mlckALLOCXADLIST; | 1755 | xadlock->flag = mlckALLOCXADLIST; |
1756 | xadlock->count = next - lwm; | 1756 | xadlock->count = next - lwm; |
1757 | if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { | 1757 | if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { |
1758 | int i; | 1758 | int i; |
1759 | pxd_t *pxd; | 1759 | pxd_t *pxd; |
1760 | /* | 1760 | /* |
1761 | * Lazy commit may allow xtree to be modified before | 1761 | * Lazy commit may allow xtree to be modified before |
1762 | * txUpdateMap runs. Copy xad into linelock to | 1762 | * txUpdateMap runs. Copy xad into linelock to |
1763 | * preserve correct data. | 1763 | * preserve correct data. |
1764 | * | 1764 | * |
1765 | * We can fit twice as may pxd's as xads in the lock | 1765 | * We can fit twice as may pxd's as xads in the lock |
1766 | */ | 1766 | */ |
1767 | xadlock->flag = mlckALLOCPXDLIST; | 1767 | xadlock->flag = mlckALLOCPXDLIST; |
1768 | pxd = xadlock->xdlist = &xtlck->pxdlock; | 1768 | pxd = xadlock->xdlist = &xtlck->pxdlock; |
1769 | for (i = 0; i < xadlock->count; i++) { | 1769 | for (i = 0; i < xadlock->count; i++) { |
1770 | PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); | 1770 | PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); |
1771 | PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); | 1771 | PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); |
1772 | p->xad[lwm + i].flag &= | 1772 | p->xad[lwm + i].flag &= |
1773 | ~(XAD_NEW | XAD_EXTENDED); | 1773 | ~(XAD_NEW | XAD_EXTENDED); |
1774 | pxd++; | 1774 | pxd++; |
1775 | } | 1775 | } |
1776 | } else { | 1776 | } else { |
1777 | /* | 1777 | /* |
1778 | * xdlist will point to into inode's xtree, ensure | 1778 | * xdlist will point to into inode's xtree, ensure |
1779 | * that transaction is not committed lazily. | 1779 | * that transaction is not committed lazily. |
1780 | */ | 1780 | */ |
1781 | xadlock->flag = mlckALLOCXADLIST; | 1781 | xadlock->flag = mlckALLOCXADLIST; |
1782 | xadlock->xdlist = &p->xad[lwm]; | 1782 | xadlock->xdlist = &p->xad[lwm]; |
1783 | tblk->xflag &= ~COMMIT_LAZY; | 1783 | tblk->xflag &= ~COMMIT_LAZY; |
1784 | } | 1784 | } |
1785 | jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d " | 1785 | jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d " |
1786 | "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); | 1786 | "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); |
1787 | 1787 | ||
1788 | maplock->index = 1; | 1788 | maplock->index = 1; |
1789 | 1789 | ||
1790 | out: | 1790 | out: |
1791 | /* mark page as homeward bound */ | 1791 | /* mark page as homeward bound */ |
1792 | tlck->flag |= tlckWRITEPAGE; | 1792 | tlck->flag |= tlckWRITEPAGE; |
1793 | 1793 | ||
1794 | return; | 1794 | return; |
1795 | } | 1795 | } |
1796 | 1796 | ||
1797 | /* | 1797 | /* |
1798 | * page deletion: file deletion/truncation (ref. xtTruncate()) | 1798 | * page deletion: file deletion/truncation (ref. xtTruncate()) |
1799 | * | 1799 | * |
1800 | * (page will be invalidated after log is written and bmap | 1800 | * (page will be invalidated after log is written and bmap |
1801 | * is updated from the page); | 1801 | * is updated from the page); |
1802 | */ | 1802 | */ |
1803 | if (tlck->type & tlckFREE) { | 1803 | if (tlck->type & tlckFREE) { |
1804 | /* LOG_NOREDOPAGE log for NoRedoPage filter: | 1804 | /* LOG_NOREDOPAGE log for NoRedoPage filter: |
1805 | * if page free from file delete, NoRedoFile filter from | 1805 | * if page free from file delete, NoRedoFile filter from |
1806 | * inode image of zero link count will subsume NoRedoPage | 1806 | * inode image of zero link count will subsume NoRedoPage |
1807 | * filters for each page; | 1807 | * filters for each page; |
1808 | * if page free from file truncattion, write NoRedoPage | 1808 | * if page free from file truncattion, write NoRedoPage |
1809 | * filter; | 1809 | * filter; |
1810 | * | 1810 | * |
1811 | * upadte of block allocation map for the page itself: | 1811 | * upadte of block allocation map for the page itself: |
1812 | * if page free from deletion and truncation, LOG_UPDATEMAP | 1812 | * if page free from deletion and truncation, LOG_UPDATEMAP |
1813 | * log for the page itself is generated from processing | 1813 | * log for the page itself is generated from processing |
1814 | * its parent page xad entries; | 1814 | * its parent page xad entries; |
1815 | */ | 1815 | */ |
1816 | /* if page free from file truncation, log LOG_NOREDOPAGE | 1816 | /* if page free from file truncation, log LOG_NOREDOPAGE |
1817 | * of the deleted page for logredo() to start NoRedoPage | 1817 | * of the deleted page for logredo() to start NoRedoPage |
1818 | * filter for the page; | 1818 | * filter for the page; |
1819 | */ | 1819 | */ |
1820 | if (tblk->xflag & COMMIT_TRUNCATE) { | 1820 | if (tblk->xflag & COMMIT_TRUNCATE) { |
1821 | /* write NOREDOPAGE for the page */ | 1821 | /* write NOREDOPAGE for the page */ |
1822 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | 1822 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); |
1823 | PXDaddress(page_pxd, mp->index); | 1823 | PXDaddress(page_pxd, mp->index); |
1824 | PXDlength(page_pxd, | 1824 | PXDlength(page_pxd, |
1825 | mp->logical_size >> tblk->sb-> | 1825 | mp->logical_size >> tblk->sb-> |
1826 | s_blocksize_bits); | 1826 | s_blocksize_bits); |
1827 | lrd->backchain = | 1827 | lrd->backchain = |
1828 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | 1828 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); |
1829 | 1829 | ||
1830 | if (tlck->type & tlckBTROOT) { | 1830 | if (tlck->type & tlckBTROOT) { |
1831 | /* Empty xtree must be logged */ | 1831 | /* Empty xtree must be logged */ |
1832 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | 1832 | lrd->type = cpu_to_le16(LOG_REDOPAGE); |
1833 | lrd->backchain = | 1833 | lrd->backchain = |
1834 | cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1834 | cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1835 | } | 1835 | } |
1836 | } | 1836 | } |
1837 | 1837 | ||
1838 | /* init LOG_UPDATEMAP of the freed extents | 1838 | /* init LOG_UPDATEMAP of the freed extents |
1839 | * XAD[XTENTRYSTART:hwm) from the deleted page itself | 1839 | * XAD[XTENTRYSTART:hwm) from the deleted page itself |
1840 | * for logredo() to update bmap; | 1840 | * for logredo() to update bmap; |
1841 | */ | 1841 | */ |
1842 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | 1842 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); |
1843 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); | 1843 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); |
1844 | xtlck = (struct xtlock *) & tlck->lock; | 1844 | xtlck = (struct xtlock *) & tlck->lock; |
1845 | hwm = xtlck->hwm.offset; | 1845 | hwm = xtlck->hwm.offset; |
1846 | lrd->log.updatemap.nxd = | 1846 | lrd->log.updatemap.nxd = |
1847 | cpu_to_le16(hwm - XTENTRYSTART + 1); | 1847 | cpu_to_le16(hwm - XTENTRYSTART + 1); |
1848 | /* reformat linelock for lmLog() */ | 1848 | /* reformat linelock for lmLog() */ |
1849 | xtlck->header.offset = XTENTRYSTART; | 1849 | xtlck->header.offset = XTENTRYSTART; |
1850 | xtlck->header.length = hwm - XTENTRYSTART + 1; | 1850 | xtlck->header.length = hwm - XTENTRYSTART + 1; |
1851 | xtlck->index = 1; | 1851 | xtlck->index = 1; |
1852 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1852 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1853 | 1853 | ||
1854 | /* format a maplock for txUpdateMap() to update bmap | 1854 | /* format a maplock for txUpdateMap() to update bmap |
1855 | * to free extents of XAD[XTENTRYSTART:hwm) from the | 1855 | * to free extents of XAD[XTENTRYSTART:hwm) from the |
1856 | * deleted page itself; | 1856 | * deleted page itself; |
1857 | */ | 1857 | */ |
1858 | tlck->flag |= tlckUPDATEMAP; | 1858 | tlck->flag |= tlckUPDATEMAP; |
1859 | xadlock->count = hwm - XTENTRYSTART + 1; | 1859 | xadlock->count = hwm - XTENTRYSTART + 1; |
1860 | if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { | 1860 | if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { |
1861 | int i; | 1861 | int i; |
1862 | pxd_t *pxd; | 1862 | pxd_t *pxd; |
1863 | /* | 1863 | /* |
1864 | * Lazy commit may allow xtree to be modified before | 1864 | * Lazy commit may allow xtree to be modified before |
1865 | * txUpdateMap runs. Copy xad into linelock to | 1865 | * txUpdateMap runs. Copy xad into linelock to |
1866 | * preserve correct data. | 1866 | * preserve correct data. |
1867 | * | 1867 | * |
1868 | * We can fit twice as may pxd's as xads in the lock | 1868 | * We can fit twice as may pxd's as xads in the lock |
1869 | */ | 1869 | */ |
1870 | xadlock->flag = mlckFREEPXDLIST; | 1870 | xadlock->flag = mlckFREEPXDLIST; |
1871 | pxd = xadlock->xdlist = &xtlck->pxdlock; | 1871 | pxd = xadlock->xdlist = &xtlck->pxdlock; |
1872 | for (i = 0; i < xadlock->count; i++) { | 1872 | for (i = 0; i < xadlock->count; i++) { |
1873 | PXDaddress(pxd, | 1873 | PXDaddress(pxd, |
1874 | addressXAD(&p->xad[XTENTRYSTART + i])); | 1874 | addressXAD(&p->xad[XTENTRYSTART + i])); |
1875 | PXDlength(pxd, | 1875 | PXDlength(pxd, |
1876 | lengthXAD(&p->xad[XTENTRYSTART + i])); | 1876 | lengthXAD(&p->xad[XTENTRYSTART + i])); |
1877 | pxd++; | 1877 | pxd++; |
1878 | } | 1878 | } |
1879 | } else { | 1879 | } else { |
1880 | /* | 1880 | /* |
1881 | * xdlist will point to into inode's xtree, ensure | 1881 | * xdlist will point to into inode's xtree, ensure |
1882 | * that transaction is not committed lazily. | 1882 | * that transaction is not committed lazily. |
1883 | */ | 1883 | */ |
1884 | xadlock->flag = mlckFREEXADLIST; | 1884 | xadlock->flag = mlckFREEXADLIST; |
1885 | xadlock->xdlist = &p->xad[XTENTRYSTART]; | 1885 | xadlock->xdlist = &p->xad[XTENTRYSTART]; |
1886 | tblk->xflag &= ~COMMIT_LAZY; | 1886 | tblk->xflag &= ~COMMIT_LAZY; |
1887 | } | 1887 | } |
1888 | jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", | 1888 | jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", |
1889 | tlck->ip, mp, xadlock->count); | 1889 | tlck->ip, mp, xadlock->count); |
1890 | 1890 | ||
1891 | maplock->index = 1; | 1891 | maplock->index = 1; |
1892 | 1892 | ||
1893 | /* mark page as invalid */ | 1893 | /* mark page as invalid */ |
1894 | if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) | 1894 | if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) |
1895 | && !(tlck->type & tlckBTROOT)) | 1895 | && !(tlck->type & tlckBTROOT)) |
1896 | tlck->flag |= tlckFREEPAGE; | 1896 | tlck->flag |= tlckFREEPAGE; |
1897 | /* | 1897 | /* |
1898 | else (tblk->xflag & COMMIT_PMAP) | 1898 | else (tblk->xflag & COMMIT_PMAP) |
1899 | ? release the page; | 1899 | ? release the page; |
1900 | */ | 1900 | */ |
1901 | return; | 1901 | return; |
1902 | } | 1902 | } |
1903 | 1903 | ||
1904 | /* | 1904 | /* |
1905 | * page/entry truncation: file truncation (ref. xtTruncate()) | 1905 | * page/entry truncation: file truncation (ref. xtTruncate()) |
1906 | * | 1906 | * |
1907 | * |----------+------+------+---------------| | 1907 | * |----------+------+------+---------------| |
1908 | * | | | | 1908 | * | | | |
1909 | * | | hwm - hwm before truncation | 1909 | * | | hwm - hwm before truncation |
1910 | * | next - truncation point | 1910 | * | next - truncation point |
1911 | * lwm - lwm before truncation | 1911 | * lwm - lwm before truncation |
1912 | * header ? | 1912 | * header ? |
1913 | */ | 1913 | */ |
1914 | if (tlck->type & tlckTRUNCATE) { | 1914 | if (tlck->type & tlckTRUNCATE) { |
1915 | pxd_t pxd; /* truncated extent of xad */ | 1915 | pxd_t pxd; /* truncated extent of xad */ |
1916 | int twm; | 1916 | int twm; |
1917 | 1917 | ||
1918 | /* | 1918 | /* |
1919 | * For truncation the entire linelock may be used, so it would | 1919 | * For truncation the entire linelock may be used, so it would |
1920 | * be difficult to store xad list in linelock itself. | 1920 | * be difficult to store xad list in linelock itself. |
1921 | * Therefore, we'll just force transaction to be committed | 1921 | * Therefore, we'll just force transaction to be committed |
1922 | * synchronously, so that xtree pages won't be changed before | 1922 | * synchronously, so that xtree pages won't be changed before |
1923 | * txUpdateMap runs. | 1923 | * txUpdateMap runs. |
1924 | */ | 1924 | */ |
1925 | tblk->xflag &= ~COMMIT_LAZY; | 1925 | tblk->xflag &= ~COMMIT_LAZY; |
1926 | lwm = xtlck->lwm.offset; | 1926 | lwm = xtlck->lwm.offset; |
1927 | if (lwm == 0) | 1927 | if (lwm == 0) |
1928 | lwm = XTPAGEMAXSLOT; | 1928 | lwm = XTPAGEMAXSLOT; |
1929 | hwm = xtlck->hwm.offset; | 1929 | hwm = xtlck->hwm.offset; |
1930 | twm = xtlck->twm.offset; | 1930 | twm = xtlck->twm.offset; |
1931 | 1931 | ||
1932 | /* | 1932 | /* |
1933 | * write log records | 1933 | * write log records |
1934 | */ | 1934 | */ |
1935 | /* log after-image for logredo(): | 1935 | /* log after-image for logredo(): |
1936 | * | 1936 | * |
1937 | * logredo() will update bmap for alloc of new/extended | 1937 | * logredo() will update bmap for alloc of new/extended |
1938 | * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from | 1938 | * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from |
1939 | * after-image of XADlist; | 1939 | * after-image of XADlist; |
1940 | * logredo() resets (XAD_NEW|XAD_EXTEND) flag when | 1940 | * logredo() resets (XAD_NEW|XAD_EXTEND) flag when |
1941 | * applying the after-image to the meta-data page. | 1941 | * applying the after-image to the meta-data page. |
1942 | */ | 1942 | */ |
1943 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | 1943 | lrd->type = cpu_to_le16(LOG_REDOPAGE); |
1944 | PXDaddress(page_pxd, mp->index); | 1944 | PXDaddress(page_pxd, mp->index); |
1945 | PXDlength(page_pxd, | 1945 | PXDlength(page_pxd, |
1946 | mp->logical_size >> tblk->sb->s_blocksize_bits); | 1946 | mp->logical_size >> tblk->sb->s_blocksize_bits); |
1947 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1947 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1948 | 1948 | ||
1949 | /* | 1949 | /* |
1950 | * truncate entry XAD[twm == next - 1]: | 1950 | * truncate entry XAD[twm == next - 1]: |
1951 | */ | 1951 | */ |
1952 | if (twm == next - 1) { | 1952 | if (twm == next - 1) { |
1953 | /* init LOG_UPDATEMAP for logredo() to update bmap for | 1953 | /* init LOG_UPDATEMAP for logredo() to update bmap for |
1954 | * free of truncated delta extent of the truncated | 1954 | * free of truncated delta extent of the truncated |
1955 | * entry XAD[next - 1]: | 1955 | * entry XAD[next - 1]: |
1956 | * (xtlck->pxdlock = truncated delta extent); | 1956 | * (xtlck->pxdlock = truncated delta extent); |
1957 | */ | 1957 | */ |
1958 | pxdlock = (struct pxd_lock *) & xtlck->pxdlock; | 1958 | pxdlock = (struct pxd_lock *) & xtlck->pxdlock; |
1959 | /* assert(pxdlock->type & tlckTRUNCATE); */ | 1959 | /* assert(pxdlock->type & tlckTRUNCATE); */ |
1960 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | 1960 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); |
1961 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); | 1961 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); |
1962 | lrd->log.updatemap.nxd = cpu_to_le16(1); | 1962 | lrd->log.updatemap.nxd = cpu_to_le16(1); |
1963 | lrd->log.updatemap.pxd = pxdlock->pxd; | 1963 | lrd->log.updatemap.pxd = pxdlock->pxd; |
1964 | pxd = pxdlock->pxd; /* save to format maplock */ | 1964 | pxd = pxdlock->pxd; /* save to format maplock */ |
1965 | lrd->backchain = | 1965 | lrd->backchain = |
1966 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | 1966 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); |
1967 | } | 1967 | } |
1968 | 1968 | ||
1969 | /* | 1969 | /* |
1970 | * free entries XAD[next:hwm]: | 1970 | * free entries XAD[next:hwm]: |
1971 | */ | 1971 | */ |
1972 | if (hwm >= next) { | 1972 | if (hwm >= next) { |
1973 | /* init LOG_UPDATEMAP of the freed extents | 1973 | /* init LOG_UPDATEMAP of the freed extents |
1974 | * XAD[next:hwm] from the deleted page itself | 1974 | * XAD[next:hwm] from the deleted page itself |
1975 | * for logredo() to update bmap; | 1975 | * for logredo() to update bmap; |
1976 | */ | 1976 | */ |
1977 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | 1977 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); |
1978 | lrd->log.updatemap.type = | 1978 | lrd->log.updatemap.type = |
1979 | cpu_to_le16(LOG_FREEXADLIST); | 1979 | cpu_to_le16(LOG_FREEXADLIST); |
1980 | xtlck = (struct xtlock *) & tlck->lock; | 1980 | xtlck = (struct xtlock *) & tlck->lock; |
1981 | hwm = xtlck->hwm.offset; | 1981 | hwm = xtlck->hwm.offset; |
1982 | lrd->log.updatemap.nxd = | 1982 | lrd->log.updatemap.nxd = |
1983 | cpu_to_le16(hwm - next + 1); | 1983 | cpu_to_le16(hwm - next + 1); |
1984 | /* reformat linelock for lmLog() */ | 1984 | /* reformat linelock for lmLog() */ |
1985 | xtlck->header.offset = next; | 1985 | xtlck->header.offset = next; |
1986 | xtlck->header.length = hwm - next + 1; | 1986 | xtlck->header.length = hwm - next + 1; |
1987 | xtlck->index = 1; | 1987 | xtlck->index = 1; |
1988 | lrd->backchain = | 1988 | lrd->backchain = |
1989 | cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | 1989 | cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1990 | } | 1990 | } |
1991 | 1991 | ||
1992 | /* | 1992 | /* |
1993 | * format maplock(s) for txUpdateMap() to update bmap | 1993 | * format maplock(s) for txUpdateMap() to update bmap |
1994 | */ | 1994 | */ |
1995 | maplock->index = 0; | 1995 | maplock->index = 0; |
1996 | 1996 | ||
1997 | /* | 1997 | /* |
1998 | * allocate entries XAD[lwm:next): | 1998 | * allocate entries XAD[lwm:next): |
1999 | */ | 1999 | */ |
2000 | if (lwm < next) { | 2000 | if (lwm < next) { |
2001 | /* format a maplock for txUpdateMap() to update bPMAP | 2001 | /* format a maplock for txUpdateMap() to update bPMAP |
2002 | * for alloc of new/extended extents of XAD[lwm:next) | 2002 | * for alloc of new/extended extents of XAD[lwm:next) |
2003 | * from the page itself; | 2003 | * from the page itself; |
2004 | * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. | 2004 | * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. |
2005 | */ | 2005 | */ |
2006 | tlck->flag |= tlckUPDATEMAP; | 2006 | tlck->flag |= tlckUPDATEMAP; |
2007 | xadlock->flag = mlckALLOCXADLIST; | 2007 | xadlock->flag = mlckALLOCXADLIST; |
2008 | xadlock->count = next - lwm; | 2008 | xadlock->count = next - lwm; |
2009 | xadlock->xdlist = &p->xad[lwm]; | 2009 | xadlock->xdlist = &p->xad[lwm]; |
2010 | 2010 | ||
2011 | jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d " | 2011 | jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d " |
2012 | "lwm:%d next:%d", | 2012 | "lwm:%d next:%d", |
2013 | tlck->ip, mp, xadlock->count, lwm, next); | 2013 | tlck->ip, mp, xadlock->count, lwm, next); |
2014 | maplock->index++; | 2014 | maplock->index++; |
2015 | xadlock++; | 2015 | xadlock++; |
2016 | } | 2016 | } |
2017 | 2017 | ||
2018 | /* | 2018 | /* |
2019 | * truncate entry XAD[twm == next - 1]: | 2019 | * truncate entry XAD[twm == next - 1]: |
2020 | */ | 2020 | */ |
2021 | if (twm == next - 1) { | 2021 | if (twm == next - 1) { |
2022 | struct pxd_lock *pxdlock; | 2022 | struct pxd_lock *pxdlock; |
2023 | 2023 | ||
2024 | /* format a maplock for txUpdateMap() to update bmap | 2024 | /* format a maplock for txUpdateMap() to update bmap |
2025 | * to free truncated delta extent of the truncated | 2025 | * to free truncated delta extent of the truncated |
2026 | * entry XAD[next - 1]; | 2026 | * entry XAD[next - 1]; |
2027 | * (xtlck->pxdlock = truncated delta extent); | 2027 | * (xtlck->pxdlock = truncated delta extent); |
2028 | */ | 2028 | */ |
2029 | tlck->flag |= tlckUPDATEMAP; | 2029 | tlck->flag |= tlckUPDATEMAP; |
2030 | pxdlock = (struct pxd_lock *) xadlock; | 2030 | pxdlock = (struct pxd_lock *) xadlock; |
2031 | pxdlock->flag = mlckFREEPXD; | 2031 | pxdlock->flag = mlckFREEPXD; |
2032 | pxdlock->count = 1; | 2032 | pxdlock->count = 1; |
2033 | pxdlock->pxd = pxd; | 2033 | pxdlock->pxd = pxd; |
2034 | 2034 | ||
2035 | jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " | 2035 | jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " |
2036 | "hwm:%d", ip, mp, pxdlock->count, hwm); | 2036 | "hwm:%d", ip, mp, pxdlock->count, hwm); |
2037 | maplock->index++; | 2037 | maplock->index++; |
2038 | xadlock++; | 2038 | xadlock++; |
2039 | } | 2039 | } |
2040 | 2040 | ||
2041 | /* | 2041 | /* |
2042 | * free entries XAD[next:hwm]: | 2042 | * free entries XAD[next:hwm]: |
2043 | */ | 2043 | */ |
2044 | if (hwm >= next) { | 2044 | if (hwm >= next) { |
2045 | /* format a maplock for txUpdateMap() to update bmap | 2045 | /* format a maplock for txUpdateMap() to update bmap |
2046 | * to free extents of XAD[next:hwm] from thedeleted | 2046 | * to free extents of XAD[next:hwm] from thedeleted |
2047 | * page itself; | 2047 | * page itself; |
2048 | */ | 2048 | */ |
2049 | tlck->flag |= tlckUPDATEMAP; | 2049 | tlck->flag |= tlckUPDATEMAP; |
2050 | xadlock->flag = mlckFREEXADLIST; | 2050 | xadlock->flag = mlckFREEXADLIST; |
2051 | xadlock->count = hwm - next + 1; | 2051 | xadlock->count = hwm - next + 1; |
2052 | xadlock->xdlist = &p->xad[next]; | 2052 | xadlock->xdlist = &p->xad[next]; |
2053 | 2053 | ||
2054 | jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d " | 2054 | jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d " |
2055 | "next:%d hwm:%d", | 2055 | "next:%d hwm:%d", |
2056 | tlck->ip, mp, xadlock->count, next, hwm); | 2056 | tlck->ip, mp, xadlock->count, next, hwm); |
2057 | maplock->index++; | 2057 | maplock->index++; |
2058 | } | 2058 | } |
2059 | 2059 | ||
2060 | /* mark page as homeward bound */ | 2060 | /* mark page as homeward bound */ |
2061 | tlck->flag |= tlckWRITEPAGE; | 2061 | tlck->flag |= tlckWRITEPAGE; |
2062 | } | 2062 | } |
2063 | return; | 2063 | return; |
2064 | } | 2064 | } |
2065 | 2065 | ||
2066 | /* | 2066 | /* |
2067 | * mapLog() | 2067 | * mapLog() |
2068 | * | 2068 | * |
2069 | * function: log from maplock of freed data extents; | 2069 | * function: log from maplock of freed data extents; |
2070 | */ | 2070 | */ |
2071 | void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 2071 | void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
2072 | struct tlock * tlck) | 2072 | struct tlock * tlck) |
2073 | { | 2073 | { |
2074 | struct pxd_lock *pxdlock; | 2074 | struct pxd_lock *pxdlock; |
2075 | int i, nlock; | 2075 | int i, nlock; |
2076 | pxd_t *pxd; | 2076 | pxd_t *pxd; |
2077 | 2077 | ||
2078 | /* | 2078 | /* |
2079 | * page relocation: free the source page extent | 2079 | * page relocation: free the source page extent |
2080 | * | 2080 | * |
2081 | * a maplock for txUpdateMap() for free of the page | 2081 | * a maplock for txUpdateMap() for free of the page |
2082 | * has been formatted at txLock() time saving the src | 2082 | * has been formatted at txLock() time saving the src |
2083 | * relocated page address; | 2083 | * relocated page address; |
2084 | */ | 2084 | */ |
2085 | if (tlck->type & tlckRELOCATE) { | 2085 | if (tlck->type & tlckRELOCATE) { |
2086 | /* log LOG_NOREDOPAGE of the old relocated page | 2086 | /* log LOG_NOREDOPAGE of the old relocated page |
2087 | * for logredo() to start NoRedoPage filter; | 2087 | * for logredo() to start NoRedoPage filter; |
2088 | */ | 2088 | */ |
2089 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | 2089 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); |
2090 | pxdlock = (struct pxd_lock *) & tlck->lock; | 2090 | pxdlock = (struct pxd_lock *) & tlck->lock; |
2091 | pxd = &lrd->log.redopage.pxd; | 2091 | pxd = &lrd->log.redopage.pxd; |
2092 | *pxd = pxdlock->pxd; | 2092 | *pxd = pxdlock->pxd; |
2093 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | 2093 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); |
2094 | 2094 | ||
2095 | /* (N.B. currently, logredo() does NOT update bmap | 2095 | /* (N.B. currently, logredo() does NOT update bmap |
2096 | * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); | 2096 | * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); |
2097 | * if page free from relocation, LOG_UPDATEMAP log is | 2097 | * if page free from relocation, LOG_UPDATEMAP log is |
2098 | * specifically generated now for logredo() | 2098 | * specifically generated now for logredo() |
2099 | * to update bmap for free of src relocated page; | 2099 | * to update bmap for free of src relocated page; |
2100 | * (new flag LOG_RELOCATE may be introduced which will | 2100 | * (new flag LOG_RELOCATE may be introduced which will |
2101 | * inform logredo() to start NORedoPage filter and also | 2101 | * inform logredo() to start NORedoPage filter and also |
2102 | * update block allocation map at the same time, thus | 2102 | * update block allocation map at the same time, thus |
2103 | * avoiding an extra log write); | 2103 | * avoiding an extra log write); |
2104 | */ | 2104 | */ |
2105 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | 2105 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); |
2106 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); | 2106 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); |
2107 | lrd->log.updatemap.nxd = cpu_to_le16(1); | 2107 | lrd->log.updatemap.nxd = cpu_to_le16(1); |
2108 | lrd->log.updatemap.pxd = pxdlock->pxd; | 2108 | lrd->log.updatemap.pxd = pxdlock->pxd; |
2109 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | 2109 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); |
2110 | 2110 | ||
2111 | /* a maplock for txUpdateMap() for free of the page | 2111 | /* a maplock for txUpdateMap() for free of the page |
2112 | * has been formatted at txLock() time; | 2112 | * has been formatted at txLock() time; |
2113 | */ | 2113 | */ |
2114 | tlck->flag |= tlckUPDATEMAP; | 2114 | tlck->flag |= tlckUPDATEMAP; |
2115 | return; | 2115 | return; |
2116 | } | 2116 | } |
2117 | /* | 2117 | /* |
2118 | 2118 | ||
2119 | * Otherwise it's not a relocate request | 2119 | * Otherwise it's not a relocate request |
2120 | * | 2120 | * |
2121 | */ | 2121 | */ |
2122 | else { | 2122 | else { |
2123 | /* log LOG_UPDATEMAP for logredo() to update bmap for | 2123 | /* log LOG_UPDATEMAP for logredo() to update bmap for |
2124 | * free of truncated/relocated delta extent of the data; | 2124 | * free of truncated/relocated delta extent of the data; |
2125 | * e.g.: external EA extent, relocated/truncated extent | 2125 | * e.g.: external EA extent, relocated/truncated extent |
2126 | * from xtTailgate(); | 2126 | * from xtTailgate(); |
2127 | */ | 2127 | */ |
2128 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | 2128 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); |
2129 | pxdlock = (struct pxd_lock *) & tlck->lock; | 2129 | pxdlock = (struct pxd_lock *) & tlck->lock; |
2130 | nlock = pxdlock->index; | 2130 | nlock = pxdlock->index; |
2131 | for (i = 0; i < nlock; i++, pxdlock++) { | 2131 | for (i = 0; i < nlock; i++, pxdlock++) { |
2132 | if (pxdlock->flag & mlckALLOCPXD) | 2132 | if (pxdlock->flag & mlckALLOCPXD) |
2133 | lrd->log.updatemap.type = | 2133 | lrd->log.updatemap.type = |
2134 | cpu_to_le16(LOG_ALLOCPXD); | 2134 | cpu_to_le16(LOG_ALLOCPXD); |
2135 | else | 2135 | else |
2136 | lrd->log.updatemap.type = | 2136 | lrd->log.updatemap.type = |
2137 | cpu_to_le16(LOG_FREEPXD); | 2137 | cpu_to_le16(LOG_FREEPXD); |
2138 | lrd->log.updatemap.nxd = cpu_to_le16(1); | 2138 | lrd->log.updatemap.nxd = cpu_to_le16(1); |
2139 | lrd->log.updatemap.pxd = pxdlock->pxd; | 2139 | lrd->log.updatemap.pxd = pxdlock->pxd; |
2140 | lrd->backchain = | 2140 | lrd->backchain = |
2141 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | 2141 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); |
2142 | jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", | 2142 | jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", |
2143 | (ulong) addressPXD(&pxdlock->pxd), | 2143 | (ulong) addressPXD(&pxdlock->pxd), |
2144 | lengthPXD(&pxdlock->pxd)); | 2144 | lengthPXD(&pxdlock->pxd)); |
2145 | } | 2145 | } |
2146 | 2146 | ||
2147 | /* update bmap */ | 2147 | /* update bmap */ |
2148 | tlck->flag |= tlckUPDATEMAP; | 2148 | tlck->flag |= tlckUPDATEMAP; |
2149 | } | 2149 | } |
2150 | } | 2150 | } |
2151 | 2151 | ||
2152 | /* | 2152 | /* |
2153 | * txEA() | 2153 | * txEA() |
2154 | * | 2154 | * |
2155 | * function: acquire maplock for EA/ACL extents or | 2155 | * function: acquire maplock for EA/ACL extents or |
2156 | * set COMMIT_INLINE flag; | 2156 | * set COMMIT_INLINE flag; |
2157 | */ | 2157 | */ |
2158 | void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) | 2158 | void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) |
2159 | { | 2159 | { |
2160 | struct tlock *tlck = NULL; | 2160 | struct tlock *tlck = NULL; |
2161 | struct pxd_lock *maplock = NULL, *pxdlock = NULL; | 2161 | struct pxd_lock *maplock = NULL, *pxdlock = NULL; |
2162 | 2162 | ||
2163 | /* | 2163 | /* |
2164 | * format maplock for alloc of new EA extent | 2164 | * format maplock for alloc of new EA extent |
2165 | */ | 2165 | */ |
2166 | if (newea) { | 2166 | if (newea) { |
2167 | /* Since the newea could be a completely zeroed entry we need to | 2167 | /* Since the newea could be a completely zeroed entry we need to |
2168 | * check for the two flags which indicate we should actually | 2168 | * check for the two flags which indicate we should actually |
2169 | * commit new EA data | 2169 | * commit new EA data |
2170 | */ | 2170 | */ |
2171 | if (newea->flag & DXD_EXTENT) { | 2171 | if (newea->flag & DXD_EXTENT) { |
2172 | tlck = txMaplock(tid, ip, tlckMAP); | 2172 | tlck = txMaplock(tid, ip, tlckMAP); |
2173 | maplock = (struct pxd_lock *) & tlck->lock; | 2173 | maplock = (struct pxd_lock *) & tlck->lock; |
2174 | pxdlock = (struct pxd_lock *) maplock; | 2174 | pxdlock = (struct pxd_lock *) maplock; |
2175 | pxdlock->flag = mlckALLOCPXD; | 2175 | pxdlock->flag = mlckALLOCPXD; |
2176 | PXDaddress(&pxdlock->pxd, addressDXD(newea)); | 2176 | PXDaddress(&pxdlock->pxd, addressDXD(newea)); |
2177 | PXDlength(&pxdlock->pxd, lengthDXD(newea)); | 2177 | PXDlength(&pxdlock->pxd, lengthDXD(newea)); |
2178 | pxdlock++; | 2178 | pxdlock++; |
2179 | maplock->index = 1; | 2179 | maplock->index = 1; |
2180 | } else if (newea->flag & DXD_INLINE) { | 2180 | } else if (newea->flag & DXD_INLINE) { |
2181 | tlck = NULL; | 2181 | tlck = NULL; |
2182 | 2182 | ||
2183 | set_cflag(COMMIT_Inlineea, ip); | 2183 | set_cflag(COMMIT_Inlineea, ip); |
2184 | } | 2184 | } |
2185 | } | 2185 | } |
2186 | 2186 | ||
2187 | /* | 2187 | /* |
2188 | * format maplock for free of old EA extent | 2188 | * format maplock for free of old EA extent |
2189 | */ | 2189 | */ |
2190 | if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { | 2190 | if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { |
2191 | if (tlck == NULL) { | 2191 | if (tlck == NULL) { |
2192 | tlck = txMaplock(tid, ip, tlckMAP); | 2192 | tlck = txMaplock(tid, ip, tlckMAP); |
2193 | maplock = (struct pxd_lock *) & tlck->lock; | 2193 | maplock = (struct pxd_lock *) & tlck->lock; |
2194 | pxdlock = (struct pxd_lock *) maplock; | 2194 | pxdlock = (struct pxd_lock *) maplock; |
2195 | maplock->index = 0; | 2195 | maplock->index = 0; |
2196 | } | 2196 | } |
2197 | pxdlock->flag = mlckFREEPXD; | 2197 | pxdlock->flag = mlckFREEPXD; |
2198 | PXDaddress(&pxdlock->pxd, addressDXD(oldea)); | 2198 | PXDaddress(&pxdlock->pxd, addressDXD(oldea)); |
2199 | PXDlength(&pxdlock->pxd, lengthDXD(oldea)); | 2199 | PXDlength(&pxdlock->pxd, lengthDXD(oldea)); |
2200 | maplock->index++; | 2200 | maplock->index++; |
2201 | } | 2201 | } |
2202 | } | 2202 | } |
2203 | 2203 | ||
2204 | /* | 2204 | /* |
2205 | * txForce() | 2205 | * txForce() |
2206 | * | 2206 | * |
2207 | * function: synchronously write pages locked by transaction | 2207 | * function: synchronously write pages locked by transaction |
2208 | * after txLog() but before txUpdateMap(); | 2208 | * after txLog() but before txUpdateMap(); |
2209 | */ | 2209 | */ |
2210 | void txForce(struct tblock * tblk) | 2210 | void txForce(struct tblock * tblk) |
2211 | { | 2211 | { |
2212 | struct tlock *tlck; | 2212 | struct tlock *tlck; |
2213 | lid_t lid, next; | 2213 | lid_t lid, next; |
2214 | struct metapage *mp; | 2214 | struct metapage *mp; |
2215 | 2215 | ||
2216 | /* | 2216 | /* |
2217 | * reverse the order of transaction tlocks in | 2217 | * reverse the order of transaction tlocks in |
2218 | * careful update order of address index pages | 2218 | * careful update order of address index pages |
2219 | * (right to left, bottom up) | 2219 | * (right to left, bottom up) |
2220 | */ | 2220 | */ |
2221 | tlck = lid_to_tlock(tblk->next); | 2221 | tlck = lid_to_tlock(tblk->next); |
2222 | lid = tlck->next; | 2222 | lid = tlck->next; |
2223 | tlck->next = 0; | 2223 | tlck->next = 0; |
2224 | while (lid) { | 2224 | while (lid) { |
2225 | tlck = lid_to_tlock(lid); | 2225 | tlck = lid_to_tlock(lid); |
2226 | next = tlck->next; | 2226 | next = tlck->next; |
2227 | tlck->next = tblk->next; | 2227 | tlck->next = tblk->next; |
2228 | tblk->next = lid; | 2228 | tblk->next = lid; |
2229 | lid = next; | 2229 | lid = next; |
2230 | } | 2230 | } |
2231 | 2231 | ||
2232 | /* | 2232 | /* |
2233 | * synchronously write the page, and | 2233 | * synchronously write the page, and |
2234 | * hold the page for txUpdateMap(); | 2234 | * hold the page for txUpdateMap(); |
2235 | */ | 2235 | */ |
2236 | for (lid = tblk->next; lid; lid = next) { | 2236 | for (lid = tblk->next; lid; lid = next) { |
2237 | tlck = lid_to_tlock(lid); | 2237 | tlck = lid_to_tlock(lid); |
2238 | next = tlck->next; | 2238 | next = tlck->next; |
2239 | 2239 | ||
2240 | if ((mp = tlck->mp) != NULL && | 2240 | if ((mp = tlck->mp) != NULL && |
2241 | (tlck->type & tlckBTROOT) == 0) { | 2241 | (tlck->type & tlckBTROOT) == 0) { |
2242 | assert(mp->xflag & COMMIT_PAGE); | 2242 | assert(mp->xflag & COMMIT_PAGE); |
2243 | 2243 | ||
2244 | if (tlck->flag & tlckWRITEPAGE) { | 2244 | if (tlck->flag & tlckWRITEPAGE) { |
2245 | tlck->flag &= ~tlckWRITEPAGE; | 2245 | tlck->flag &= ~tlckWRITEPAGE; |
2246 | 2246 | ||
2247 | /* do not release page to freelist */ | 2247 | /* do not release page to freelist */ |
2248 | force_metapage(mp); | 2248 | force_metapage(mp); |
2249 | #if 0 | 2249 | #if 0 |
2250 | /* | 2250 | /* |
2251 | * The "right" thing to do here is to | 2251 | * The "right" thing to do here is to |
2252 | * synchronously write the metadata. | 2252 | * synchronously write the metadata. |
2253 | * With the current implementation this | 2253 | * With the current implementation this |
2254 | * is hard since write_metapage requires | 2254 | * is hard since write_metapage requires |
2255 | * us to kunmap & remap the page. If we | 2255 | * us to kunmap & remap the page. If we |
2256 | * have tlocks pointing into the metadata | 2256 | * have tlocks pointing into the metadata |
2257 | * pages, we don't want to do this. I think | 2257 | * pages, we don't want to do this. I think |
2258 | * we can get by with synchronously writing | 2258 | * we can get by with synchronously writing |
2259 | * the pages when they are released. | 2259 | * the pages when they are released. |
2260 | */ | 2260 | */ |
2261 | assert(mp->nohomeok); | 2261 | assert(mp->nohomeok); |
2262 | set_bit(META_dirty, &mp->flag); | 2262 | set_bit(META_dirty, &mp->flag); |
2263 | set_bit(META_sync, &mp->flag); | 2263 | set_bit(META_sync, &mp->flag); |
2264 | #endif | 2264 | #endif |
2265 | } | 2265 | } |
2266 | } | 2266 | } |
2267 | } | 2267 | } |
2268 | } | 2268 | } |
2269 | 2269 | ||
2270 | /* | 2270 | /* |
2271 | * txUpdateMap() | 2271 | * txUpdateMap() |
2272 | * | 2272 | * |
2273 | * function: update persistent allocation map (and working map | 2273 | * function: update persistent allocation map (and working map |
2274 | * if appropriate); | 2274 | * if appropriate); |
2275 | * | 2275 | * |
2276 | * parameter: | 2276 | * parameter: |
2277 | */ | 2277 | */ |
2278 | static void txUpdateMap(struct tblock * tblk) | 2278 | static void txUpdateMap(struct tblock * tblk) |
2279 | { | 2279 | { |
2280 | struct inode *ip; | 2280 | struct inode *ip; |
2281 | struct inode *ipimap; | 2281 | struct inode *ipimap; |
2282 | lid_t lid; | 2282 | lid_t lid; |
2283 | struct tlock *tlck; | 2283 | struct tlock *tlck; |
2284 | struct maplock *maplock; | 2284 | struct maplock *maplock; |
2285 | struct pxd_lock pxdlock; | 2285 | struct pxd_lock pxdlock; |
2286 | int maptype; | 2286 | int maptype; |
2287 | int k, nlock; | 2287 | int k, nlock; |
2288 | struct metapage *mp = NULL; | 2288 | struct metapage *mp = NULL; |
2289 | 2289 | ||
2290 | ipimap = JFS_SBI(tblk->sb)->ipimap; | 2290 | ipimap = JFS_SBI(tblk->sb)->ipimap; |
2291 | 2291 | ||
2292 | maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; | 2292 | maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; |
2293 | 2293 | ||
2294 | 2294 | ||
2295 | /* | 2295 | /* |
2296 | * update block allocation map | 2296 | * update block allocation map |
2297 | * | 2297 | * |
2298 | * update allocation state in pmap (and wmap) and | 2298 | * update allocation state in pmap (and wmap) and |
2299 | * update lsn of the pmap page; | 2299 | * update lsn of the pmap page; |
2300 | */ | 2300 | */ |
2301 | /* | 2301 | /* |
2302 | * scan each tlock/page of transaction for block allocation/free: | 2302 | * scan each tlock/page of transaction for block allocation/free: |
2303 | * | 2303 | * |
2304 | * for each tlock/page of transaction, update map. | 2304 | * for each tlock/page of transaction, update map. |
2305 | * ? are there tlock for pmap and pwmap at the same time ? | 2305 | * ? are there tlock for pmap and pwmap at the same time ? |
2306 | */ | 2306 | */ |
2307 | for (lid = tblk->next; lid; lid = tlck->next) { | 2307 | for (lid = tblk->next; lid; lid = tlck->next) { |
2308 | tlck = lid_to_tlock(lid); | 2308 | tlck = lid_to_tlock(lid); |
2309 | 2309 | ||
2310 | if ((tlck->flag & tlckUPDATEMAP) == 0) | 2310 | if ((tlck->flag & tlckUPDATEMAP) == 0) |
2311 | continue; | 2311 | continue; |
2312 | 2312 | ||
2313 | if (tlck->flag & tlckFREEPAGE) { | 2313 | if (tlck->flag & tlckFREEPAGE) { |
2314 | /* | 2314 | /* |
2315 | * Another thread may attempt to reuse freed space | 2315 | * Another thread may attempt to reuse freed space |
2316 | * immediately, so we want to get rid of the metapage | 2316 | * immediately, so we want to get rid of the metapage |
2317 | * before anyone else has a chance to get it. | 2317 | * before anyone else has a chance to get it. |
2318 | * Lock metapage, update maps, then invalidate | 2318 | * Lock metapage, update maps, then invalidate |
2319 | * the metapage. | 2319 | * the metapage. |
2320 | */ | 2320 | */ |
2321 | mp = tlck->mp; | 2321 | mp = tlck->mp; |
2322 | ASSERT(mp->xflag & COMMIT_PAGE); | 2322 | ASSERT(mp->xflag & COMMIT_PAGE); |
2323 | grab_metapage(mp); | 2323 | grab_metapage(mp); |
2324 | } | 2324 | } |
2325 | 2325 | ||
2326 | /* | 2326 | /* |
2327 | * extent list: | 2327 | * extent list: |
2328 | * . in-line PXD list: | 2328 | * . in-line PXD list: |
2329 | * . out-of-line XAD list: | 2329 | * . out-of-line XAD list: |
2330 | */ | 2330 | */ |
2331 | maplock = (struct maplock *) & tlck->lock; | 2331 | maplock = (struct maplock *) & tlck->lock; |
2332 | nlock = maplock->index; | 2332 | nlock = maplock->index; |
2333 | 2333 | ||
2334 | for (k = 0; k < nlock; k++, maplock++) { | 2334 | for (k = 0; k < nlock; k++, maplock++) { |
2335 | /* | 2335 | /* |
2336 | * allocate blocks in persistent map: | 2336 | * allocate blocks in persistent map: |
2337 | * | 2337 | * |
2338 | * blocks have been allocated from wmap at alloc time; | 2338 | * blocks have been allocated from wmap at alloc time; |
2339 | */ | 2339 | */ |
2340 | if (maplock->flag & mlckALLOC) { | 2340 | if (maplock->flag & mlckALLOC) { |
2341 | txAllocPMap(ipimap, maplock, tblk); | 2341 | txAllocPMap(ipimap, maplock, tblk); |
2342 | } | 2342 | } |
2343 | /* | 2343 | /* |
2344 | * free blocks in persistent and working map: | 2344 | * free blocks in persistent and working map: |
2345 | * blocks will be freed in pmap and then in wmap; | 2345 | * blocks will be freed in pmap and then in wmap; |
2346 | * | 2346 | * |
2347 | * ? tblock specifies the PMAP/PWMAP based upon | 2347 | * ? tblock specifies the PMAP/PWMAP based upon |
2348 | * transaction | 2348 | * transaction |
2349 | * | 2349 | * |
2350 | * free blocks in persistent map: | 2350 | * free blocks in persistent map: |
2351 | * blocks will be freed from wmap at last reference | 2351 | * blocks will be freed from wmap at last reference |
2352 | * release of the object for regular files; | 2352 | * release of the object for regular files; |
2353 | * | 2353 | * |
2354 | * Alway free blocks from both persistent & working | 2354 | * Alway free blocks from both persistent & working |
2355 | * maps for directories | 2355 | * maps for directories |
2356 | */ | 2356 | */ |
2357 | else { /* (maplock->flag & mlckFREE) */ | 2357 | else { /* (maplock->flag & mlckFREE) */ |
2358 | 2358 | ||
2359 | if (S_ISDIR(tlck->ip->i_mode)) | 2359 | if (S_ISDIR(tlck->ip->i_mode)) |
2360 | txFreeMap(ipimap, maplock, | 2360 | txFreeMap(ipimap, maplock, |
2361 | tblk, COMMIT_PWMAP); | 2361 | tblk, COMMIT_PWMAP); |
2362 | else | 2362 | else |
2363 | txFreeMap(ipimap, maplock, | 2363 | txFreeMap(ipimap, maplock, |
2364 | tblk, maptype); | 2364 | tblk, maptype); |
2365 | } | 2365 | } |
2366 | } | 2366 | } |
2367 | if (tlck->flag & tlckFREEPAGE) { | 2367 | if (tlck->flag & tlckFREEPAGE) { |
2368 | if (!(tblk->flag & tblkGC_LAZY)) { | 2368 | if (!(tblk->flag & tblkGC_LAZY)) { |
2369 | /* This is equivalent to txRelease */ | 2369 | /* This is equivalent to txRelease */ |
2370 | ASSERT(mp->lid == lid); | 2370 | ASSERT(mp->lid == lid); |
2371 | tlck->mp->lid = 0; | 2371 | tlck->mp->lid = 0; |
2372 | } | 2372 | } |
2373 | assert(mp->nohomeok == 1); | 2373 | assert(mp->nohomeok == 1); |
2374 | metapage_homeok(mp); | 2374 | metapage_homeok(mp); |
2375 | discard_metapage(mp); | 2375 | discard_metapage(mp); |
2376 | tlck->mp = NULL; | 2376 | tlck->mp = NULL; |
2377 | } | 2377 | } |
2378 | } | 2378 | } |
2379 | /* | 2379 | /* |
2380 | * update inode allocation map | 2380 | * update inode allocation map |
2381 | * | 2381 | * |
2382 | * update allocation state in pmap and | 2382 | * update allocation state in pmap and |
2383 | * update lsn of the pmap page; | 2383 | * update lsn of the pmap page; |
2384 | * update in-memory inode flag/state | 2384 | * update in-memory inode flag/state |
2385 | * | 2385 | * |
2386 | * unlock mapper/write lock | 2386 | * unlock mapper/write lock |
2387 | */ | 2387 | */ |
2388 | if (tblk->xflag & COMMIT_CREATE) { | 2388 | if (tblk->xflag & COMMIT_CREATE) { |
2389 | diUpdatePMap(ipimap, tblk->ino, FALSE, tblk); | 2389 | diUpdatePMap(ipimap, tblk->ino, FALSE, tblk); |
2390 | ipimap->i_state |= I_DIRTY; | 2390 | ipimap->i_state |= I_DIRTY; |
2391 | /* update persistent block allocation map | 2391 | /* update persistent block allocation map |
2392 | * for the allocation of inode extent; | 2392 | * for the allocation of inode extent; |
2393 | */ | 2393 | */ |
2394 | pxdlock.flag = mlckALLOCPXD; | 2394 | pxdlock.flag = mlckALLOCPXD; |
2395 | pxdlock.pxd = tblk->u.ixpxd; | 2395 | pxdlock.pxd = tblk->u.ixpxd; |
2396 | pxdlock.index = 1; | 2396 | pxdlock.index = 1; |
2397 | txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); | 2397 | txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); |
2398 | } else if (tblk->xflag & COMMIT_DELETE) { | 2398 | } else if (tblk->xflag & COMMIT_DELETE) { |
2399 | ip = tblk->u.ip; | 2399 | ip = tblk->u.ip; |
2400 | diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk); | 2400 | diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk); |
2401 | ipimap->i_state |= I_DIRTY; | 2401 | ipimap->i_state |= I_DIRTY; |
2402 | iput(ip); | 2402 | iput(ip); |
2403 | } | 2403 | } |
2404 | } | 2404 | } |
2405 | 2405 | ||
2406 | /* | 2406 | /* |
2407 | * txAllocPMap() | 2407 | * txAllocPMap() |
2408 | * | 2408 | * |
2409 | * function: allocate from persistent map; | 2409 | * function: allocate from persistent map; |
2410 | * | 2410 | * |
2411 | * parameter: | 2411 | * parameter: |
2412 | * ipbmap - | 2412 | * ipbmap - |
2413 | * malock - | 2413 | * malock - |
2414 | * xad list: | 2414 | * xad list: |
2415 | * pxd: | 2415 | * pxd: |
2416 | * | 2416 | * |
2417 | * maptype - | 2417 | * maptype - |
2418 | * allocate from persistent map; | 2418 | * allocate from persistent map; |
2419 | * free from persistent map; | 2419 | * free from persistent map; |
2420 | * (e.g., tmp file - free from working map at releae | 2420 | * (e.g., tmp file - free from working map at releae |
2421 | * of last reference); | 2421 | * of last reference); |
2422 | * free from persistent and working map; | 2422 | * free from persistent and working map; |
2423 | * | 2423 | * |
2424 | * lsn - log sequence number; | 2424 | * lsn - log sequence number; |
2425 | */ | 2425 | */ |
2426 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, | 2426 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, |
2427 | struct tblock * tblk) | 2427 | struct tblock * tblk) |
2428 | { | 2428 | { |
2429 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 2429 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
2430 | struct xdlistlock *xadlistlock; | 2430 | struct xdlistlock *xadlistlock; |
2431 | xad_t *xad; | 2431 | xad_t *xad; |
2432 | s64 xaddr; | 2432 | s64 xaddr; |
2433 | int xlen; | 2433 | int xlen; |
2434 | struct pxd_lock *pxdlock; | 2434 | struct pxd_lock *pxdlock; |
2435 | struct xdlistlock *pxdlistlock; | 2435 | struct xdlistlock *pxdlistlock; |
2436 | pxd_t *pxd; | 2436 | pxd_t *pxd; |
2437 | int n; | 2437 | int n; |
2438 | 2438 | ||
2439 | /* | 2439 | /* |
2440 | * allocate from persistent map; | 2440 | * allocate from persistent map; |
2441 | */ | 2441 | */ |
2442 | if (maplock->flag & mlckALLOCXADLIST) { | 2442 | if (maplock->flag & mlckALLOCXADLIST) { |
2443 | xadlistlock = (struct xdlistlock *) maplock; | 2443 | xadlistlock = (struct xdlistlock *) maplock; |
2444 | xad = xadlistlock->xdlist; | 2444 | xad = xadlistlock->xdlist; |
2445 | for (n = 0; n < xadlistlock->count; n++, xad++) { | 2445 | for (n = 0; n < xadlistlock->count; n++, xad++) { |
2446 | if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { | 2446 | if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { |
2447 | xaddr = addressXAD(xad); | 2447 | xaddr = addressXAD(xad); |
2448 | xlen = lengthXAD(xad); | 2448 | xlen = lengthXAD(xad); |
2449 | dbUpdatePMap(ipbmap, FALSE, xaddr, | 2449 | dbUpdatePMap(ipbmap, FALSE, xaddr, |
2450 | (s64) xlen, tblk); | 2450 | (s64) xlen, tblk); |
2451 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); | 2451 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); |
2452 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", | 2452 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", |
2453 | (ulong) xaddr, xlen); | 2453 | (ulong) xaddr, xlen); |
2454 | } | 2454 | } |
2455 | } | 2455 | } |
2456 | } else if (maplock->flag & mlckALLOCPXD) { | 2456 | } else if (maplock->flag & mlckALLOCPXD) { |
2457 | pxdlock = (struct pxd_lock *) maplock; | 2457 | pxdlock = (struct pxd_lock *) maplock; |
2458 | xaddr = addressPXD(&pxdlock->pxd); | 2458 | xaddr = addressPXD(&pxdlock->pxd); |
2459 | xlen = lengthPXD(&pxdlock->pxd); | 2459 | xlen = lengthPXD(&pxdlock->pxd); |
2460 | dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk); | 2460 | dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk); |
2461 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); | 2461 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); |
2462 | } else { /* (maplock->flag & mlckALLOCPXDLIST) */ | 2462 | } else { /* (maplock->flag & mlckALLOCPXDLIST) */ |
2463 | 2463 | ||
2464 | pxdlistlock = (struct xdlistlock *) maplock; | 2464 | pxdlistlock = (struct xdlistlock *) maplock; |
2465 | pxd = pxdlistlock->xdlist; | 2465 | pxd = pxdlistlock->xdlist; |
2466 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | 2466 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { |
2467 | xaddr = addressPXD(pxd); | 2467 | xaddr = addressPXD(pxd); |
2468 | xlen = lengthPXD(pxd); | 2468 | xlen = lengthPXD(pxd); |
2469 | dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, | 2469 | dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, |
2470 | tblk); | 2470 | tblk); |
2471 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", | 2471 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", |
2472 | (ulong) xaddr, xlen); | 2472 | (ulong) xaddr, xlen); |
2473 | } | 2473 | } |
2474 | } | 2474 | } |
2475 | } | 2475 | } |
2476 | 2476 | ||
2477 | /* | 2477 | /* |
2478 | * txFreeMap() | 2478 | * txFreeMap() |
2479 | * | 2479 | * |
2480 | * function: free from persistent and/or working map; | 2480 | * function: free from persistent and/or working map; |
2481 | * | 2481 | * |
2482 | * todo: optimization | 2482 | * todo: optimization |
2483 | */ | 2483 | */ |
2484 | void txFreeMap(struct inode *ip, | 2484 | void txFreeMap(struct inode *ip, |
2485 | struct maplock * maplock, struct tblock * tblk, int maptype) | 2485 | struct maplock * maplock, struct tblock * tblk, int maptype) |
2486 | { | 2486 | { |
2487 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 2487 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
2488 | struct xdlistlock *xadlistlock; | 2488 | struct xdlistlock *xadlistlock; |
2489 | xad_t *xad; | 2489 | xad_t *xad; |
2490 | s64 xaddr; | 2490 | s64 xaddr; |
2491 | int xlen; | 2491 | int xlen; |
2492 | struct pxd_lock *pxdlock; | 2492 | struct pxd_lock *pxdlock; |
2493 | struct xdlistlock *pxdlistlock; | 2493 | struct xdlistlock *pxdlistlock; |
2494 | pxd_t *pxd; | 2494 | pxd_t *pxd; |
2495 | int n; | 2495 | int n; |
2496 | 2496 | ||
2497 | jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", | 2497 | jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", |
2498 | tblk, maplock, maptype); | 2498 | tblk, maplock, maptype); |
2499 | 2499 | ||
2500 | /* | 2500 | /* |
2501 | * free from persistent map; | 2501 | * free from persistent map; |
2502 | */ | 2502 | */ |
2503 | if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { | 2503 | if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { |
2504 | if (maplock->flag & mlckFREEXADLIST) { | 2504 | if (maplock->flag & mlckFREEXADLIST) { |
2505 | xadlistlock = (struct xdlistlock *) maplock; | 2505 | xadlistlock = (struct xdlistlock *) maplock; |
2506 | xad = xadlistlock->xdlist; | 2506 | xad = xadlistlock->xdlist; |
2507 | for (n = 0; n < xadlistlock->count; n++, xad++) { | 2507 | for (n = 0; n < xadlistlock->count; n++, xad++) { |
2508 | if (!(xad->flag & XAD_NEW)) { | 2508 | if (!(xad->flag & XAD_NEW)) { |
2509 | xaddr = addressXAD(xad); | 2509 | xaddr = addressXAD(xad); |
2510 | xlen = lengthXAD(xad); | 2510 | xlen = lengthXAD(xad); |
2511 | dbUpdatePMap(ipbmap, TRUE, xaddr, | 2511 | dbUpdatePMap(ipbmap, TRUE, xaddr, |
2512 | (s64) xlen, tblk); | 2512 | (s64) xlen, tblk); |
2513 | jfs_info("freePMap: xaddr:0x%lx " | 2513 | jfs_info("freePMap: xaddr:0x%lx " |
2514 | "xlen:%d", | 2514 | "xlen:%d", |
2515 | (ulong) xaddr, xlen); | 2515 | (ulong) xaddr, xlen); |
2516 | } | 2516 | } |
2517 | } | 2517 | } |
2518 | } else if (maplock->flag & mlckFREEPXD) { | 2518 | } else if (maplock->flag & mlckFREEPXD) { |
2519 | pxdlock = (struct pxd_lock *) maplock; | 2519 | pxdlock = (struct pxd_lock *) maplock; |
2520 | xaddr = addressPXD(&pxdlock->pxd); | 2520 | xaddr = addressPXD(&pxdlock->pxd); |
2521 | xlen = lengthPXD(&pxdlock->pxd); | 2521 | xlen = lengthPXD(&pxdlock->pxd); |
2522 | dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen, | 2522 | dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen, |
2523 | tblk); | 2523 | tblk); |
2524 | jfs_info("freePMap: xaddr:0x%lx xlen:%d", | 2524 | jfs_info("freePMap: xaddr:0x%lx xlen:%d", |
2525 | (ulong) xaddr, xlen); | 2525 | (ulong) xaddr, xlen); |
2526 | } else { /* (maplock->flag & mlckALLOCPXDLIST) */ | 2526 | } else { /* (maplock->flag & mlckALLOCPXDLIST) */ |
2527 | 2527 | ||
2528 | pxdlistlock = (struct xdlistlock *) maplock; | 2528 | pxdlistlock = (struct xdlistlock *) maplock; |
2529 | pxd = pxdlistlock->xdlist; | 2529 | pxd = pxdlistlock->xdlist; |
2530 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | 2530 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { |
2531 | xaddr = addressPXD(pxd); | 2531 | xaddr = addressPXD(pxd); |
2532 | xlen = lengthPXD(pxd); | 2532 | xlen = lengthPXD(pxd); |
2533 | dbUpdatePMap(ipbmap, TRUE, xaddr, | 2533 | dbUpdatePMap(ipbmap, TRUE, xaddr, |
2534 | (s64) xlen, tblk); | 2534 | (s64) xlen, tblk); |
2535 | jfs_info("freePMap: xaddr:0x%lx xlen:%d", | 2535 | jfs_info("freePMap: xaddr:0x%lx xlen:%d", |
2536 | (ulong) xaddr, xlen); | 2536 | (ulong) xaddr, xlen); |
2537 | } | 2537 | } |
2538 | } | 2538 | } |
2539 | } | 2539 | } |
2540 | 2540 | ||
2541 | /* | 2541 | /* |
2542 | * free from working map; | 2542 | * free from working map; |
2543 | */ | 2543 | */ |
2544 | if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { | 2544 | if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { |
2545 | if (maplock->flag & mlckFREEXADLIST) { | 2545 | if (maplock->flag & mlckFREEXADLIST) { |
2546 | xadlistlock = (struct xdlistlock *) maplock; | 2546 | xadlistlock = (struct xdlistlock *) maplock; |
2547 | xad = xadlistlock->xdlist; | 2547 | xad = xadlistlock->xdlist; |
2548 | for (n = 0; n < xadlistlock->count; n++, xad++) { | 2548 | for (n = 0; n < xadlistlock->count; n++, xad++) { |
2549 | xaddr = addressXAD(xad); | 2549 | xaddr = addressXAD(xad); |
2550 | xlen = lengthXAD(xad); | 2550 | xlen = lengthXAD(xad); |
2551 | dbFree(ip, xaddr, (s64) xlen); | 2551 | dbFree(ip, xaddr, (s64) xlen); |
2552 | xad->flag = 0; | 2552 | xad->flag = 0; |
2553 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | 2553 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", |
2554 | (ulong) xaddr, xlen); | 2554 | (ulong) xaddr, xlen); |
2555 | } | 2555 | } |
2556 | } else if (maplock->flag & mlckFREEPXD) { | 2556 | } else if (maplock->flag & mlckFREEPXD) { |
2557 | pxdlock = (struct pxd_lock *) maplock; | 2557 | pxdlock = (struct pxd_lock *) maplock; |
2558 | xaddr = addressPXD(&pxdlock->pxd); | 2558 | xaddr = addressPXD(&pxdlock->pxd); |
2559 | xlen = lengthPXD(&pxdlock->pxd); | 2559 | xlen = lengthPXD(&pxdlock->pxd); |
2560 | dbFree(ip, xaddr, (s64) xlen); | 2560 | dbFree(ip, xaddr, (s64) xlen); |
2561 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | 2561 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", |
2562 | (ulong) xaddr, xlen); | 2562 | (ulong) xaddr, xlen); |
2563 | } else { /* (maplock->flag & mlckFREEPXDLIST) */ | 2563 | } else { /* (maplock->flag & mlckFREEPXDLIST) */ |
2564 | 2564 | ||
2565 | pxdlistlock = (struct xdlistlock *) maplock; | 2565 | pxdlistlock = (struct xdlistlock *) maplock; |
2566 | pxd = pxdlistlock->xdlist; | 2566 | pxd = pxdlistlock->xdlist; |
2567 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | 2567 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { |
2568 | xaddr = addressPXD(pxd); | 2568 | xaddr = addressPXD(pxd); |
2569 | xlen = lengthPXD(pxd); | 2569 | xlen = lengthPXD(pxd); |
2570 | dbFree(ip, xaddr, (s64) xlen); | 2570 | dbFree(ip, xaddr, (s64) xlen); |
2571 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | 2571 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", |
2572 | (ulong) xaddr, xlen); | 2572 | (ulong) xaddr, xlen); |
2573 | } | 2573 | } |
2574 | } | 2574 | } |
2575 | } | 2575 | } |
2576 | } | 2576 | } |
2577 | 2577 | ||
2578 | /* | 2578 | /* |
2579 | * txFreelock() | 2579 | * txFreelock() |
2580 | * | 2580 | * |
2581 | * function: remove tlock from inode anonymous locklist | 2581 | * function: remove tlock from inode anonymous locklist |
2582 | */ | 2582 | */ |
2583 | void txFreelock(struct inode *ip) | 2583 | void txFreelock(struct inode *ip) |
2584 | { | 2584 | { |
2585 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 2585 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
2586 | struct tlock *xtlck, *tlck; | 2586 | struct tlock *xtlck, *tlck; |
2587 | lid_t xlid = 0, lid; | 2587 | lid_t xlid = 0, lid; |
2588 | 2588 | ||
2589 | if (!jfs_ip->atlhead) | 2589 | if (!jfs_ip->atlhead) |
2590 | return; | 2590 | return; |
2591 | 2591 | ||
2592 | TXN_LOCK(); | 2592 | TXN_LOCK(); |
2593 | xtlck = (struct tlock *) &jfs_ip->atlhead; | 2593 | xtlck = (struct tlock *) &jfs_ip->atlhead; |
2594 | 2594 | ||
2595 | while ((lid = xtlck->next) != 0) { | 2595 | while ((lid = xtlck->next) != 0) { |
2596 | tlck = lid_to_tlock(lid); | 2596 | tlck = lid_to_tlock(lid); |
2597 | if (tlck->flag & tlckFREELOCK) { | 2597 | if (tlck->flag & tlckFREELOCK) { |
2598 | xtlck->next = tlck->next; | 2598 | xtlck->next = tlck->next; |
2599 | txLockFree(lid); | 2599 | txLockFree(lid); |
2600 | } else { | 2600 | } else { |
2601 | xtlck = tlck; | 2601 | xtlck = tlck; |
2602 | xlid = lid; | 2602 | xlid = lid; |
2603 | } | 2603 | } |
2604 | } | 2604 | } |
2605 | 2605 | ||
2606 | if (jfs_ip->atlhead) | 2606 | if (jfs_ip->atlhead) |
2607 | jfs_ip->atltail = xlid; | 2607 | jfs_ip->atltail = xlid; |
2608 | else { | 2608 | else { |
2609 | jfs_ip->atltail = 0; | 2609 | jfs_ip->atltail = 0; |
2610 | /* | 2610 | /* |
2611 | * If inode was on anon_list, remove it | 2611 | * If inode was on anon_list, remove it |
2612 | */ | 2612 | */ |
2613 | list_del_init(&jfs_ip->anon_inode_list); | 2613 | list_del_init(&jfs_ip->anon_inode_list); |
2614 | } | 2614 | } |
2615 | TXN_UNLOCK(); | 2615 | TXN_UNLOCK(); |
2616 | } | 2616 | } |
2617 | 2617 | ||
2618 | /* | 2618 | /* |
2619 | * txAbort() | 2619 | * txAbort() |
2620 | * | 2620 | * |
2621 | * function: abort tx before commit; | 2621 | * function: abort tx before commit; |
2622 | * | 2622 | * |
2623 | * frees line-locks and segment locks for all | 2623 | * frees line-locks and segment locks for all |
2624 | * segments in comdata structure. | 2624 | * segments in comdata structure. |
2625 | * Optionally sets state of file-system to FM_DIRTY in super-block. | 2625 | * Optionally sets state of file-system to FM_DIRTY in super-block. |
2626 | * log age of page-frames in memory for which caller has | 2626 | * log age of page-frames in memory for which caller has |
2627 | * are reset to 0 (to avoid logwarap). | 2627 | * are reset to 0 (to avoid logwarap). |
2628 | */ | 2628 | */ |
2629 | void txAbort(tid_t tid, int dirty) | 2629 | void txAbort(tid_t tid, int dirty) |
2630 | { | 2630 | { |
2631 | lid_t lid, next; | 2631 | lid_t lid, next; |
2632 | struct metapage *mp; | 2632 | struct metapage *mp; |
2633 | struct tblock *tblk = tid_to_tblock(tid); | 2633 | struct tblock *tblk = tid_to_tblock(tid); |
2634 | struct tlock *tlck; | 2634 | struct tlock *tlck; |
2635 | 2635 | ||
2636 | /* | 2636 | /* |
2637 | * free tlocks of the transaction | 2637 | * free tlocks of the transaction |
2638 | */ | 2638 | */ |
2639 | for (lid = tblk->next; lid; lid = next) { | 2639 | for (lid = tblk->next; lid; lid = next) { |
2640 | tlck = lid_to_tlock(lid); | 2640 | tlck = lid_to_tlock(lid); |
2641 | next = tlck->next; | 2641 | next = tlck->next; |
2642 | mp = tlck->mp; | 2642 | mp = tlck->mp; |
2643 | JFS_IP(tlck->ip)->xtlid = 0; | 2643 | JFS_IP(tlck->ip)->xtlid = 0; |
2644 | 2644 | ||
2645 | if (mp) { | 2645 | if (mp) { |
2646 | mp->lid = 0; | 2646 | mp->lid = 0; |
2647 | 2647 | ||
2648 | /* | 2648 | /* |
2649 | * reset lsn of page to avoid logwarap: | 2649 | * reset lsn of page to avoid logwarap: |
2650 | * | 2650 | * |
2651 | * (page may have been previously committed by another | 2651 | * (page may have been previously committed by another |
2652 | * transaction(s) but has not been paged, i.e., | 2652 | * transaction(s) but has not been paged, i.e., |
2653 | * it may be on logsync list even though it has not | 2653 | * it may be on logsync list even though it has not |
2654 | * been logged for the current tx.) | 2654 | * been logged for the current tx.) |
2655 | */ | 2655 | */ |
2656 | if (mp->xflag & COMMIT_PAGE && mp->lsn) | 2656 | if (mp->xflag & COMMIT_PAGE && mp->lsn) |
2657 | LogSyncRelease(mp); | 2657 | LogSyncRelease(mp); |
2658 | } | 2658 | } |
2659 | /* insert tlock at head of freelist */ | 2659 | /* insert tlock at head of freelist */ |
2660 | TXN_LOCK(); | 2660 | TXN_LOCK(); |
2661 | txLockFree(lid); | 2661 | txLockFree(lid); |
2662 | TXN_UNLOCK(); | 2662 | TXN_UNLOCK(); |
2663 | } | 2663 | } |
2664 | 2664 | ||
2665 | /* caller will free the transaction block */ | 2665 | /* caller will free the transaction block */ |
2666 | 2666 | ||
2667 | tblk->next = tblk->last = 0; | 2667 | tblk->next = tblk->last = 0; |
2668 | 2668 | ||
2669 | /* | 2669 | /* |
2670 | * mark filesystem dirty | 2670 | * mark filesystem dirty |
2671 | */ | 2671 | */ |
2672 | if (dirty) | 2672 | if (dirty) |
2673 | jfs_error(tblk->sb, "txAbort"); | 2673 | jfs_error(tblk->sb, "txAbort"); |
2674 | 2674 | ||
2675 | return; | 2675 | return; |
2676 | } | 2676 | } |
2677 | 2677 | ||
2678 | /* | 2678 | /* |
2679 | * txLazyCommit(void) | 2679 | * txLazyCommit(void) |
2680 | * | 2680 | * |
2681 | * All transactions except those changing ipimap (COMMIT_FORCE) are | 2681 | * All transactions except those changing ipimap (COMMIT_FORCE) are |
2682 | * processed by this routine. This insures that the inode and block | 2682 | * processed by this routine. This insures that the inode and block |
2683 | * allocation maps are updated in order. For synchronous transactions, | 2683 | * allocation maps are updated in order. For synchronous transactions, |
2684 | * let the user thread finish processing after txUpdateMap() is called. | 2684 | * let the user thread finish processing after txUpdateMap() is called. |
2685 | */ | 2685 | */ |
2686 | static void txLazyCommit(struct tblock * tblk) | 2686 | static void txLazyCommit(struct tblock * tblk) |
2687 | { | 2687 | { |
2688 | struct jfs_log *log; | 2688 | struct jfs_log *log; |
2689 | 2689 | ||
2690 | while (((tblk->flag & tblkGC_READY) == 0) && | 2690 | while (((tblk->flag & tblkGC_READY) == 0) && |
2691 | ((tblk->flag & tblkGC_UNLOCKED) == 0)) { | 2691 | ((tblk->flag & tblkGC_UNLOCKED) == 0)) { |
2692 | /* We must have gotten ahead of the user thread | 2692 | /* We must have gotten ahead of the user thread |
2693 | */ | 2693 | */ |
2694 | jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); | 2694 | jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); |
2695 | yield(); | 2695 | yield(); |
2696 | } | 2696 | } |
2697 | 2697 | ||
2698 | jfs_info("txLazyCommit: processing tblk 0x%p", tblk); | 2698 | jfs_info("txLazyCommit: processing tblk 0x%p", tblk); |
2699 | 2699 | ||
2700 | txUpdateMap(tblk); | 2700 | txUpdateMap(tblk); |
2701 | 2701 | ||
2702 | log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; | 2702 | log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; |
2703 | 2703 | ||
2704 | spin_lock_irq(&log->gclock); // LOGGC_LOCK | 2704 | spin_lock_irq(&log->gclock); // LOGGC_LOCK |
2705 | 2705 | ||
2706 | tblk->flag |= tblkGC_COMMITTED; | 2706 | tblk->flag |= tblkGC_COMMITTED; |
2707 | 2707 | ||
2708 | if (tblk->flag & tblkGC_READY) | 2708 | if (tblk->flag & tblkGC_READY) |
2709 | log->gcrtc--; | 2709 | log->gcrtc--; |
2710 | 2710 | ||
2711 | wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP | 2711 | wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP |
2712 | 2712 | ||
2713 | /* | 2713 | /* |
2714 | * Can't release log->gclock until we've tested tblk->flag | 2714 | * Can't release log->gclock until we've tested tblk->flag |
2715 | */ | 2715 | */ |
2716 | if (tblk->flag & tblkGC_LAZY) { | 2716 | if (tblk->flag & tblkGC_LAZY) { |
2717 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | 2717 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK |
2718 | txUnlock(tblk); | 2718 | txUnlock(tblk); |
2719 | tblk->flag &= ~tblkGC_LAZY; | 2719 | tblk->flag &= ~tblkGC_LAZY; |
2720 | txEnd(tblk - TxBlock); /* Convert back to tid */ | 2720 | txEnd(tblk - TxBlock); /* Convert back to tid */ |
2721 | } else | 2721 | } else |
2722 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | 2722 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK |
2723 | 2723 | ||
2724 | jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); | 2724 | jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); |
2725 | } | 2725 | } |
2726 | 2726 | ||
2727 | /* | 2727 | /* |
2728 | * jfs_lazycommit(void) | 2728 | * jfs_lazycommit(void) |
2729 | * | 2729 | * |
2730 | * To be run as a kernel daemon. If lbmIODone is called in an interrupt | 2730 | * To be run as a kernel daemon. If lbmIODone is called in an interrupt |
2731 | * context, or where blocking is not wanted, this routine will process | 2731 | * context, or where blocking is not wanted, this routine will process |
2732 | * committed transactions from the unlock queue. | 2732 | * committed transactions from the unlock queue. |
2733 | */ | 2733 | */ |
2734 | int jfs_lazycommit(void *arg) | 2734 | int jfs_lazycommit(void *arg) |
2735 | { | 2735 | { |
2736 | int WorkDone; | 2736 | int WorkDone; |
2737 | struct tblock *tblk; | 2737 | struct tblock *tblk; |
2738 | unsigned long flags; | 2738 | unsigned long flags; |
2739 | struct jfs_sb_info *sbi; | 2739 | struct jfs_sb_info *sbi; |
2740 | 2740 | ||
2741 | daemonize("jfsCommit"); | 2741 | daemonize("jfsCommit"); |
2742 | 2742 | ||
2743 | complete(&jfsIOwait); | 2743 | complete(&jfsIOwait); |
2744 | 2744 | ||
2745 | do { | 2745 | do { |
2746 | LAZY_LOCK(flags); | 2746 | LAZY_LOCK(flags); |
2747 | jfs_commit_thread_waking = 0; /* OK to wake another thread */ | 2747 | jfs_commit_thread_waking = 0; /* OK to wake another thread */ |
2748 | while (!list_empty(&TxAnchor.unlock_queue)) { | 2748 | while (!list_empty(&TxAnchor.unlock_queue)) { |
2749 | WorkDone = 0; | 2749 | WorkDone = 0; |
2750 | list_for_each_entry(tblk, &TxAnchor.unlock_queue, | 2750 | list_for_each_entry(tblk, &TxAnchor.unlock_queue, |
2751 | cqueue) { | 2751 | cqueue) { |
2752 | 2752 | ||
2753 | sbi = JFS_SBI(tblk->sb); | 2753 | sbi = JFS_SBI(tblk->sb); |
2754 | /* | 2754 | /* |
2755 | * For each volume, the transactions must be | 2755 | * For each volume, the transactions must be |
2756 | * handled in order. If another commit thread | 2756 | * handled in order. If another commit thread |
2757 | * is handling a tblk for this superblock, | 2757 | * is handling a tblk for this superblock, |
2758 | * skip it | 2758 | * skip it |
2759 | */ | 2759 | */ |
2760 | if (sbi->commit_state & IN_LAZYCOMMIT) | 2760 | if (sbi->commit_state & IN_LAZYCOMMIT) |
2761 | continue; | 2761 | continue; |
2762 | 2762 | ||
2763 | sbi->commit_state |= IN_LAZYCOMMIT; | 2763 | sbi->commit_state |= IN_LAZYCOMMIT; |
2764 | WorkDone = 1; | 2764 | WorkDone = 1; |
2765 | 2765 | ||
2766 | /* | 2766 | /* |
2767 | * Remove transaction from queue | 2767 | * Remove transaction from queue |
2768 | */ | 2768 | */ |
2769 | list_del(&tblk->cqueue); | 2769 | list_del(&tblk->cqueue); |
2770 | 2770 | ||
2771 | LAZY_UNLOCK(flags); | 2771 | LAZY_UNLOCK(flags); |
2772 | txLazyCommit(tblk); | 2772 | txLazyCommit(tblk); |
2773 | LAZY_LOCK(flags); | 2773 | LAZY_LOCK(flags); |
2774 | 2774 | ||
2775 | sbi->commit_state &= ~IN_LAZYCOMMIT; | 2775 | sbi->commit_state &= ~IN_LAZYCOMMIT; |
2776 | /* | 2776 | /* |
2777 | * Don't continue in the for loop. (We can't | 2777 | * Don't continue in the for loop. (We can't |
2778 | * anyway, it's unsafe!) We want to go back to | 2778 | * anyway, it's unsafe!) We want to go back to |
2779 | * the beginning of the list. | 2779 | * the beginning of the list. |
2780 | */ | 2780 | */ |
2781 | break; | 2781 | break; |
2782 | } | 2782 | } |
2783 | 2783 | ||
2784 | /* If there was nothing to do, don't continue */ | 2784 | /* If there was nothing to do, don't continue */ |
2785 | if (!WorkDone) | 2785 | if (!WorkDone) |
2786 | break; | 2786 | break; |
2787 | } | 2787 | } |
2788 | /* In case a wakeup came while all threads were active */ | 2788 | /* In case a wakeup came while all threads were active */ |
2789 | jfs_commit_thread_waking = 0; | 2789 | jfs_commit_thread_waking = 0; |
2790 | 2790 | ||
2791 | if (freezing(current)) { | 2791 | if (freezing(current)) { |
2792 | LAZY_UNLOCK(flags); | 2792 | LAZY_UNLOCK(flags); |
2793 | refrigerator(); | 2793 | refrigerator(); |
2794 | } else { | 2794 | } else { |
2795 | DECLARE_WAITQUEUE(wq, current); | 2795 | DECLARE_WAITQUEUE(wq, current); |
2796 | 2796 | ||
2797 | add_wait_queue(&jfs_commit_thread_wait, &wq); | 2797 | add_wait_queue(&jfs_commit_thread_wait, &wq); |
2798 | set_current_state(TASK_INTERRUPTIBLE); | 2798 | set_current_state(TASK_INTERRUPTIBLE); |
2799 | LAZY_UNLOCK(flags); | 2799 | LAZY_UNLOCK(flags); |
2800 | schedule(); | 2800 | schedule(); |
2801 | current->state = TASK_RUNNING; | 2801 | current->state = TASK_RUNNING; |
2802 | remove_wait_queue(&jfs_commit_thread_wait, &wq); | 2802 | remove_wait_queue(&jfs_commit_thread_wait, &wq); |
2803 | } | 2803 | } |
2804 | } while (!jfs_stop_threads); | 2804 | } while (!jfs_stop_threads); |
2805 | 2805 | ||
2806 | if (!list_empty(&TxAnchor.unlock_queue)) | 2806 | if (!list_empty(&TxAnchor.unlock_queue)) |
2807 | jfs_err("jfs_lazycommit being killed w/pending transactions!"); | 2807 | jfs_err("jfs_lazycommit being killed w/pending transactions!"); |
2808 | else | 2808 | else |
2809 | jfs_info("jfs_lazycommit being killed\n"); | 2809 | jfs_info("jfs_lazycommit being killed\n"); |
2810 | complete_and_exit(&jfsIOwait, 0); | 2810 | complete_and_exit(&jfsIOwait, 0); |
2811 | } | 2811 | } |
2812 | 2812 | ||
2813 | void txLazyUnlock(struct tblock * tblk) | 2813 | void txLazyUnlock(struct tblock * tblk) |
2814 | { | 2814 | { |
2815 | unsigned long flags; | 2815 | unsigned long flags; |
2816 | 2816 | ||
2817 | LAZY_LOCK(flags); | 2817 | LAZY_LOCK(flags); |
2818 | 2818 | ||
2819 | list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); | 2819 | list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); |
2820 | /* | 2820 | /* |
2821 | * Don't wake up a commit thread if there is already one servicing | 2821 | * Don't wake up a commit thread if there is already one servicing |
2822 | * this superblock, or if the last one we woke up hasn't started yet. | 2822 | * this superblock, or if the last one we woke up hasn't started yet. |
2823 | */ | 2823 | */ |
2824 | if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && | 2824 | if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && |
2825 | !jfs_commit_thread_waking) { | 2825 | !jfs_commit_thread_waking) { |
2826 | jfs_commit_thread_waking = 1; | 2826 | jfs_commit_thread_waking = 1; |
2827 | wake_up(&jfs_commit_thread_wait); | 2827 | wake_up(&jfs_commit_thread_wait); |
2828 | } | 2828 | } |
2829 | LAZY_UNLOCK(flags); | 2829 | LAZY_UNLOCK(flags); |
2830 | } | 2830 | } |
2831 | 2831 | ||
2832 | static void LogSyncRelease(struct metapage * mp) | 2832 | static void LogSyncRelease(struct metapage * mp) |
2833 | { | 2833 | { |
2834 | struct jfs_log *log = mp->log; | 2834 | struct jfs_log *log = mp->log; |
2835 | 2835 | ||
2836 | assert(mp->nohomeok); | 2836 | assert(mp->nohomeok); |
2837 | assert(log); | 2837 | assert(log); |
2838 | metapage_homeok(mp); | 2838 | metapage_homeok(mp); |
2839 | } | 2839 | } |
2840 | 2840 | ||
2841 | /* | 2841 | /* |
2842 | * txQuiesce | 2842 | * txQuiesce |
2843 | * | 2843 | * |
2844 | * Block all new transactions and push anonymous transactions to | 2844 | * Block all new transactions and push anonymous transactions to |
2845 | * completion | 2845 | * completion |
2846 | * | 2846 | * |
2847 | * This does almost the same thing as jfs_sync below. We don't | 2847 | * This does almost the same thing as jfs_sync below. We don't |
2848 | * worry about deadlocking when jfs_tlocks_low is set, since we would | 2848 | * worry about deadlocking when jfs_tlocks_low is set, since we would |
2849 | * expect jfs_sync to get us out of that jam. | 2849 | * expect jfs_sync to get us out of that jam. |
2850 | */ | 2850 | */ |
2851 | void txQuiesce(struct super_block *sb) | 2851 | void txQuiesce(struct super_block *sb) |
2852 | { | 2852 | { |
2853 | struct inode *ip; | 2853 | struct inode *ip; |
2854 | struct jfs_inode_info *jfs_ip; | 2854 | struct jfs_inode_info *jfs_ip; |
2855 | struct jfs_log *log = JFS_SBI(sb)->log; | 2855 | struct jfs_log *log = JFS_SBI(sb)->log; |
2856 | tid_t tid; | 2856 | tid_t tid; |
2857 | 2857 | ||
2858 | set_bit(log_QUIESCE, &log->flag); | 2858 | set_bit(log_QUIESCE, &log->flag); |
2859 | 2859 | ||
2860 | TXN_LOCK(); | 2860 | TXN_LOCK(); |
2861 | restart: | 2861 | restart: |
2862 | while (!list_empty(&TxAnchor.anon_list)) { | 2862 | while (!list_empty(&TxAnchor.anon_list)) { |
2863 | jfs_ip = list_entry(TxAnchor.anon_list.next, | 2863 | jfs_ip = list_entry(TxAnchor.anon_list.next, |
2864 | struct jfs_inode_info, | 2864 | struct jfs_inode_info, |
2865 | anon_inode_list); | 2865 | anon_inode_list); |
2866 | ip = &jfs_ip->vfs_inode; | 2866 | ip = &jfs_ip->vfs_inode; |
2867 | 2867 | ||
2868 | /* | 2868 | /* |
2869 | * inode will be removed from anonymous list | 2869 | * inode will be removed from anonymous list |
2870 | * when it is committed | 2870 | * when it is committed |
2871 | */ | 2871 | */ |
2872 | TXN_UNLOCK(); | 2872 | TXN_UNLOCK(); |
2873 | tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); | 2873 | tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); |
2874 | down(&jfs_ip->commit_sem); | 2874 | down(&jfs_ip->commit_sem); |
2875 | txCommit(tid, 1, &ip, 0); | 2875 | txCommit(tid, 1, &ip, 0); |
2876 | txEnd(tid); | 2876 | txEnd(tid); |
2877 | up(&jfs_ip->commit_sem); | 2877 | up(&jfs_ip->commit_sem); |
2878 | /* | 2878 | /* |
2879 | * Just to be safe. I don't know how | 2879 | * Just to be safe. I don't know how |
2880 | * long we can run without blocking | 2880 | * long we can run without blocking |
2881 | */ | 2881 | */ |
2882 | cond_resched(); | 2882 | cond_resched(); |
2883 | TXN_LOCK(); | 2883 | TXN_LOCK(); |
2884 | } | 2884 | } |
2885 | 2885 | ||
2886 | /* | 2886 | /* |
2887 | * If jfs_sync is running in parallel, there could be some inodes | 2887 | * If jfs_sync is running in parallel, there could be some inodes |
2888 | * on anon_list2. Let's check. | 2888 | * on anon_list2. Let's check. |
2889 | */ | 2889 | */ |
2890 | if (!list_empty(&TxAnchor.anon_list2)) { | 2890 | if (!list_empty(&TxAnchor.anon_list2)) { |
2891 | list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list); | 2891 | list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list); |
2892 | INIT_LIST_HEAD(&TxAnchor.anon_list2); | 2892 | INIT_LIST_HEAD(&TxAnchor.anon_list2); |
2893 | goto restart; | 2893 | goto restart; |
2894 | } | 2894 | } |
2895 | TXN_UNLOCK(); | 2895 | TXN_UNLOCK(); |
2896 | 2896 | ||
2897 | /* | 2897 | /* |
2898 | * We may need to kick off the group commit | 2898 | * We may need to kick off the group commit |
2899 | */ | 2899 | */ |
2900 | jfs_flush_journal(log, 0); | 2900 | jfs_flush_journal(log, 0); |
2901 | } | 2901 | } |
2902 | 2902 | ||
2903 | /* | 2903 | /* |
2904 | * txResume() | 2904 | * txResume() |
2905 | * | 2905 | * |
2906 | * Allows transactions to start again following txQuiesce | 2906 | * Allows transactions to start again following txQuiesce |
2907 | */ | 2907 | */ |
2908 | void txResume(struct super_block *sb) | 2908 | void txResume(struct super_block *sb) |
2909 | { | 2909 | { |
2910 | struct jfs_log *log = JFS_SBI(sb)->log; | 2910 | struct jfs_log *log = JFS_SBI(sb)->log; |
2911 | 2911 | ||
2912 | clear_bit(log_QUIESCE, &log->flag); | 2912 | clear_bit(log_QUIESCE, &log->flag); |
2913 | TXN_WAKEUP(&log->syncwait); | 2913 | TXN_WAKEUP(&log->syncwait); |
2914 | } | 2914 | } |
2915 | 2915 | ||
2916 | /* | 2916 | /* |
2917 | * jfs_sync(void) | 2917 | * jfs_sync(void) |
2918 | * | 2918 | * |
2919 | * To be run as a kernel daemon. This is awakened when tlocks run low. | 2919 | * To be run as a kernel daemon. This is awakened when tlocks run low. |
2920 | * We write any inodes that have anonymous tlocks so they will become | 2920 | * We write any inodes that have anonymous tlocks so they will become |
2921 | * available. | 2921 | * available. |
2922 | */ | 2922 | */ |
2923 | int jfs_sync(void *arg) | 2923 | int jfs_sync(void *arg) |
2924 | { | 2924 | { |
2925 | struct inode *ip; | 2925 | struct inode *ip; |
2926 | struct jfs_inode_info *jfs_ip; | 2926 | struct jfs_inode_info *jfs_ip; |
2927 | int rc; | 2927 | int rc; |
2928 | tid_t tid; | 2928 | tid_t tid; |
2929 | 2929 | ||
2930 | daemonize("jfsSync"); | 2930 | daemonize("jfsSync"); |
2931 | 2931 | ||
2932 | complete(&jfsIOwait); | 2932 | complete(&jfsIOwait); |
2933 | 2933 | ||
2934 | do { | 2934 | do { |
2935 | /* | 2935 | /* |
2936 | * write each inode on the anonymous inode list | 2936 | * write each inode on the anonymous inode list |
2937 | */ | 2937 | */ |
2938 | TXN_LOCK(); | 2938 | TXN_LOCK(); |
2939 | while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { | 2939 | while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { |
2940 | jfs_ip = list_entry(TxAnchor.anon_list.next, | 2940 | jfs_ip = list_entry(TxAnchor.anon_list.next, |
2941 | struct jfs_inode_info, | 2941 | struct jfs_inode_info, |
2942 | anon_inode_list); | 2942 | anon_inode_list); |
2943 | ip = &jfs_ip->vfs_inode; | 2943 | ip = &jfs_ip->vfs_inode; |
2944 | 2944 | ||
2945 | if (! igrab(ip)) { | 2945 | if (! igrab(ip)) { |
2946 | /* | 2946 | /* |
2947 | * Inode is being freed | 2947 | * Inode is being freed |
2948 | */ | 2948 | */ |
2949 | list_del_init(&jfs_ip->anon_inode_list); | 2949 | list_del_init(&jfs_ip->anon_inode_list); |
2950 | } else if (! down_trylock(&jfs_ip->commit_sem)) { | 2950 | } else if (! down_trylock(&jfs_ip->commit_sem)) { |
2951 | /* | 2951 | /* |
2952 | * inode will be removed from anonymous list | 2952 | * inode will be removed from anonymous list |
2953 | * when it is committed | 2953 | * when it is committed |
2954 | */ | 2954 | */ |
2955 | TXN_UNLOCK(); | 2955 | TXN_UNLOCK(); |
2956 | tid = txBegin(ip->i_sb, COMMIT_INODE); | 2956 | tid = txBegin(ip->i_sb, COMMIT_INODE); |
2957 | rc = txCommit(tid, 1, &ip, 0); | 2957 | rc = txCommit(tid, 1, &ip, 0); |
2958 | txEnd(tid); | 2958 | txEnd(tid); |
2959 | up(&jfs_ip->commit_sem); | 2959 | up(&jfs_ip->commit_sem); |
2960 | 2960 | ||
2961 | iput(ip); | 2961 | iput(ip); |
2962 | /* | 2962 | /* |
2963 | * Just to be safe. I don't know how | 2963 | * Just to be safe. I don't know how |
2964 | * long we can run without blocking | 2964 | * long we can run without blocking |
2965 | */ | 2965 | */ |
2966 | cond_resched(); | 2966 | cond_resched(); |
2967 | TXN_LOCK(); | 2967 | TXN_LOCK(); |
2968 | } else { | 2968 | } else { |
2969 | /* We can't get the commit semaphore. It may | 2969 | /* We can't get the commit semaphore. It may |
2970 | * be held by a thread waiting for tlock's | 2970 | * be held by a thread waiting for tlock's |
2971 | * so let's not block here. Save it to | 2971 | * so let's not block here. Save it to |
2972 | * put back on the anon_list. | 2972 | * put back on the anon_list. |
2973 | */ | 2973 | */ |
2974 | 2974 | ||
2975 | /* Take off anon_list */ | 2975 | /* Take off anon_list */ |
2976 | list_del(&jfs_ip->anon_inode_list); | 2976 | list_del(&jfs_ip->anon_inode_list); |
2977 | 2977 | ||
2978 | /* Put on anon_list2 */ | 2978 | /* Put on anon_list2 */ |
2979 | list_add(&jfs_ip->anon_inode_list, | 2979 | list_add(&jfs_ip->anon_inode_list, |
2980 | &TxAnchor.anon_list2); | 2980 | &TxAnchor.anon_list2); |
2981 | 2981 | ||
2982 | TXN_UNLOCK(); | 2982 | TXN_UNLOCK(); |
2983 | iput(ip); | 2983 | iput(ip); |
2984 | TXN_LOCK(); | 2984 | TXN_LOCK(); |
2985 | } | 2985 | } |
2986 | } | 2986 | } |
2987 | /* Add anon_list2 back to anon_list */ | 2987 | /* Add anon_list2 back to anon_list */ |
2988 | list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); | 2988 | list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); |
2989 | 2989 | ||
2990 | if (freezing(current)) { | 2990 | if (freezing(current)) { |
2991 | TXN_UNLOCK(); | 2991 | TXN_UNLOCK(); |
2992 | refrigerator(); | 2992 | refrigerator(); |
2993 | } else { | 2993 | } else { |
2994 | DECLARE_WAITQUEUE(wq, current); | 2994 | DECLARE_WAITQUEUE(wq, current); |
2995 | 2995 | ||
2996 | add_wait_queue(&jfs_sync_thread_wait, &wq); | 2996 | add_wait_queue(&jfs_sync_thread_wait, &wq); |
2997 | set_current_state(TASK_INTERRUPTIBLE); | 2997 | set_current_state(TASK_INTERRUPTIBLE); |
2998 | TXN_UNLOCK(); | 2998 | TXN_UNLOCK(); |
2999 | schedule(); | 2999 | schedule(); |
3000 | current->state = TASK_RUNNING; | 3000 | current->state = TASK_RUNNING; |
3001 | remove_wait_queue(&jfs_sync_thread_wait, &wq); | 3001 | remove_wait_queue(&jfs_sync_thread_wait, &wq); |
3002 | } | 3002 | } |
3003 | } while (!jfs_stop_threads); | 3003 | } while (!jfs_stop_threads); |
3004 | 3004 | ||
3005 | jfs_info("jfs_sync being killed"); | 3005 | jfs_info("jfs_sync being killed"); |
3006 | complete_and_exit(&jfsIOwait, 0); | 3006 | complete_and_exit(&jfsIOwait, 0); |
3007 | } | 3007 | } |
3008 | 3008 | ||
3009 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) | 3009 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) |
3010 | int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, | 3010 | int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, |
3011 | int *eof, void *data) | 3011 | int *eof, void *data) |
3012 | { | 3012 | { |
3013 | int len = 0; | 3013 | int len = 0; |
3014 | off_t begin; | 3014 | off_t begin; |
3015 | char *freewait; | 3015 | char *freewait; |
3016 | char *freelockwait; | 3016 | char *freelockwait; |
3017 | char *lowlockwait; | 3017 | char *lowlockwait; |
3018 | 3018 | ||
3019 | freewait = | 3019 | freewait = |
3020 | waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; | 3020 | waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; |
3021 | freelockwait = | 3021 | freelockwait = |
3022 | waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; | 3022 | waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; |
3023 | lowlockwait = | 3023 | lowlockwait = |
3024 | waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; | 3024 | waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; |
3025 | 3025 | ||
3026 | len += sprintf(buffer, | 3026 | len += sprintf(buffer, |
3027 | "JFS TxAnchor\n" | 3027 | "JFS TxAnchor\n" |
3028 | "============\n" | 3028 | "============\n" |
3029 | "freetid = %d\n" | 3029 | "freetid = %d\n" |
3030 | "freewait = %s\n" | 3030 | "freewait = %s\n" |
3031 | "freelock = %d\n" | 3031 | "freelock = %d\n" |
3032 | "freelockwait = %s\n" | 3032 | "freelockwait = %s\n" |
3033 | "lowlockwait = %s\n" | 3033 | "lowlockwait = %s\n" |
3034 | "tlocksInUse = %d\n" | 3034 | "tlocksInUse = %d\n" |
3035 | "jfs_tlocks_low = %d\n" | 3035 | "jfs_tlocks_low = %d\n" |
3036 | "unlock_queue is %sempty\n", | 3036 | "unlock_queue is %sempty\n", |
3037 | TxAnchor.freetid, | 3037 | TxAnchor.freetid, |
3038 | freewait, | 3038 | freewait, |
3039 | TxAnchor.freelock, | 3039 | TxAnchor.freelock, |
3040 | freelockwait, | 3040 | freelockwait, |
3041 | lowlockwait, | 3041 | lowlockwait, |
3042 | TxAnchor.tlocksInUse, | 3042 | TxAnchor.tlocksInUse, |
3043 | jfs_tlocks_low, | 3043 | jfs_tlocks_low, |
3044 | list_empty(&TxAnchor.unlock_queue) ? "" : "not "); | 3044 | list_empty(&TxAnchor.unlock_queue) ? "" : "not "); |
3045 | 3045 | ||
3046 | begin = offset; | 3046 | begin = offset; |
3047 | *start = buffer + begin; | 3047 | *start = buffer + begin; |
3048 | len -= begin; | 3048 | len -= begin; |
3049 | 3049 | ||
3050 | if (len > length) | 3050 | if (len > length) |
3051 | len = length; | 3051 | len = length; |
3052 | else | 3052 | else |
3053 | *eof = 1; | 3053 | *eof = 1; |
3054 | 3054 | ||
3055 | if (len < 0) | 3055 | if (len < 0) |
3056 | len = 0; | 3056 | len = 0; |
3057 | 3057 | ||
3058 | return len; | 3058 | return len; |
3059 | } | 3059 | } |
3060 | #endif | 3060 | #endif |
3061 | 3061 | ||
3062 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) | 3062 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) |
3063 | int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, | 3063 | int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, |
3064 | int *eof, void *data) | 3064 | int *eof, void *data) |
3065 | { | 3065 | { |
3066 | int len = 0; | 3066 | int len = 0; |
3067 | off_t begin; | 3067 | off_t begin; |
3068 | 3068 | ||
3069 | len += sprintf(buffer, | 3069 | len += sprintf(buffer, |
3070 | "JFS TxStats\n" | 3070 | "JFS TxStats\n" |
3071 | "===========\n" | 3071 | "===========\n" |
3072 | "calls to txBegin = %d\n" | 3072 | "calls to txBegin = %d\n" |
3073 | "txBegin blocked by sync barrier = %d\n" | 3073 | "txBegin blocked by sync barrier = %d\n" |
3074 | "txBegin blocked by tlocks low = %d\n" | 3074 | "txBegin blocked by tlocks low = %d\n" |
3075 | "txBegin blocked by no free tid = %d\n" | 3075 | "txBegin blocked by no free tid = %d\n" |
3076 | "calls to txBeginAnon = %d\n" | 3076 | "calls to txBeginAnon = %d\n" |
3077 | "txBeginAnon blocked by sync barrier = %d\n" | 3077 | "txBeginAnon blocked by sync barrier = %d\n" |
3078 | "txBeginAnon blocked by tlocks low = %d\n" | 3078 | "txBeginAnon blocked by tlocks low = %d\n" |
3079 | "calls to txLockAlloc = %d\n" | 3079 | "calls to txLockAlloc = %d\n" |
3080 | "tLockAlloc blocked by no free lock = %d\n", | 3080 | "tLockAlloc blocked by no free lock = %d\n", |
3081 | TxStat.txBegin, | 3081 | TxStat.txBegin, |
3082 | TxStat.txBegin_barrier, | 3082 | TxStat.txBegin_barrier, |
3083 | TxStat.txBegin_lockslow, | 3083 | TxStat.txBegin_lockslow, |
3084 | TxStat.txBegin_freetid, | 3084 | TxStat.txBegin_freetid, |
3085 | TxStat.txBeginAnon, | 3085 | TxStat.txBeginAnon, |
3086 | TxStat.txBeginAnon_barrier, | 3086 | TxStat.txBeginAnon_barrier, |
3087 | TxStat.txBeginAnon_lockslow, | 3087 | TxStat.txBeginAnon_lockslow, |
3088 | TxStat.txLockAlloc, | 3088 | TxStat.txLockAlloc, |
3089 | TxStat.txLockAlloc_freelock); | 3089 | TxStat.txLockAlloc_freelock); |
3090 | 3090 | ||
3091 | begin = offset; | 3091 | begin = offset; |
3092 | *start = buffer + begin; | 3092 | *start = buffer + begin; |
3093 | len -= begin; | 3093 | len -= begin; |
3094 | 3094 | ||
3095 | if (len > length) | 3095 | if (len > length) |
3096 | len = length; | 3096 | len = length; |
3097 | else | 3097 | else |
3098 | *eof = 1; | 3098 | *eof = 1; |
3099 | 3099 | ||
3100 | if (len < 0) | 3100 | if (len < 0) |
3101 | len = 0; | 3101 | len = 0; |
fs/jfs/super.c
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2004 | 2 | * Copyright (C) International Business Machines Corp., 2000-2004 |
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | 3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/config.h> | 21 | #include <linux/config.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/parser.h> | 23 | #include <linux/parser.h> |
24 | #include <linux/completion.h> | 24 | #include <linux/completion.h> |
25 | #include <linux/vfs.h> | 25 | #include <linux/vfs.h> |
26 | #include <linux/moduleparam.h> | 26 | #include <linux/moduleparam.h> |
27 | #include <linux/posix_acl.h> | 27 | #include <linux/posix_acl.h> |
28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
29 | 29 | ||
30 | #include "jfs_incore.h" | 30 | #include "jfs_incore.h" |
31 | #include "jfs_filsys.h" | 31 | #include "jfs_filsys.h" |
32 | #include "jfs_inode.h" | 32 | #include "jfs_inode.h" |
33 | #include "jfs_metapage.h" | 33 | #include "jfs_metapage.h" |
34 | #include "jfs_superblock.h" | 34 | #include "jfs_superblock.h" |
35 | #include "jfs_dmap.h" | 35 | #include "jfs_dmap.h" |
36 | #include "jfs_imap.h" | 36 | #include "jfs_imap.h" |
37 | #include "jfs_acl.h" | 37 | #include "jfs_acl.h" |
38 | #include "jfs_debug.h" | 38 | #include "jfs_debug.h" |
39 | 39 | ||
40 | MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); | 40 | MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); |
41 | MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); | 41 | MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); |
42 | MODULE_LICENSE("GPL"); | 42 | MODULE_LICENSE("GPL"); |
43 | 43 | ||
44 | static kmem_cache_t * jfs_inode_cachep; | 44 | static kmem_cache_t * jfs_inode_cachep; |
45 | 45 | ||
46 | static struct super_operations jfs_super_operations; | 46 | static struct super_operations jfs_super_operations; |
47 | static struct export_operations jfs_export_operations; | 47 | static struct export_operations jfs_export_operations; |
48 | static struct file_system_type jfs_fs_type; | 48 | static struct file_system_type jfs_fs_type; |
49 | 49 | ||
50 | #define MAX_COMMIT_THREADS 64 | 50 | #define MAX_COMMIT_THREADS 64 |
51 | static int commit_threads = 0; | 51 | static int commit_threads = 0; |
52 | module_param(commit_threads, int, 0); | 52 | module_param(commit_threads, int, 0); |
53 | MODULE_PARM_DESC(commit_threads, "Number of commit threads"); | 53 | MODULE_PARM_DESC(commit_threads, "Number of commit threads"); |
54 | 54 | ||
55 | int jfs_stop_threads; | 55 | int jfs_stop_threads; |
56 | static pid_t jfsIOthread; | 56 | static pid_t jfsIOthread; |
57 | static pid_t jfsCommitThread[MAX_COMMIT_THREADS]; | 57 | static pid_t jfsCommitThread[MAX_COMMIT_THREADS]; |
58 | static pid_t jfsSyncThread; | 58 | static pid_t jfsSyncThread; |
59 | DECLARE_COMPLETION(jfsIOwait); | 59 | DECLARE_COMPLETION(jfsIOwait); |
60 | 60 | ||
61 | #ifdef CONFIG_JFS_DEBUG | 61 | #ifdef CONFIG_JFS_DEBUG |
62 | int jfsloglevel = JFS_LOGLEVEL_WARN; | 62 | int jfsloglevel = JFS_LOGLEVEL_WARN; |
63 | module_param(jfsloglevel, int, 0644); | 63 | module_param(jfsloglevel, int, 0644); |
64 | MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)"); | 64 | MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)"); |
65 | #endif | 65 | #endif |
66 | 66 | ||
67 | static void jfs_handle_error(struct super_block *sb) | 67 | static void jfs_handle_error(struct super_block *sb) |
68 | { | 68 | { |
69 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 69 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
70 | 70 | ||
71 | if (sb->s_flags & MS_RDONLY) | 71 | if (sb->s_flags & MS_RDONLY) |
72 | return; | 72 | return; |
73 | 73 | ||
74 | updateSuper(sb, FM_DIRTY); | 74 | updateSuper(sb, FM_DIRTY); |
75 | 75 | ||
76 | if (sbi->flag & JFS_ERR_PANIC) | 76 | if (sbi->flag & JFS_ERR_PANIC) |
77 | panic("JFS (device %s): panic forced after error\n", | 77 | panic("JFS (device %s): panic forced after error\n", |
78 | sb->s_id); | 78 | sb->s_id); |
79 | else if (sbi->flag & JFS_ERR_REMOUNT_RO) { | 79 | else if (sbi->flag & JFS_ERR_REMOUNT_RO) { |
80 | jfs_err("ERROR: (device %s): remounting filesystem " | 80 | jfs_err("ERROR: (device %s): remounting filesystem " |
81 | "as read-only\n", | 81 | "as read-only\n", |
82 | sb->s_id); | 82 | sb->s_id); |
83 | sb->s_flags |= MS_RDONLY; | 83 | sb->s_flags |= MS_RDONLY; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* nothing is done for continue beyond marking the superblock dirty */ | 86 | /* nothing is done for continue beyond marking the superblock dirty */ |
87 | } | 87 | } |
88 | 88 | ||
89 | void jfs_error(struct super_block *sb, const char * function, ...) | 89 | void jfs_error(struct super_block *sb, const char * function, ...) |
90 | { | 90 | { |
91 | static char error_buf[256]; | 91 | static char error_buf[256]; |
92 | va_list args; | 92 | va_list args; |
93 | 93 | ||
94 | va_start(args, function); | 94 | va_start(args, function); |
95 | vsprintf(error_buf, function, args); | 95 | vsprintf(error_buf, function, args); |
96 | va_end(args); | 96 | va_end(args); |
97 | 97 | ||
98 | printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); | 98 | printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); |
99 | 99 | ||
100 | jfs_handle_error(sb); | 100 | jfs_handle_error(sb); |
101 | } | 101 | } |
102 | 102 | ||
103 | static struct inode *jfs_alloc_inode(struct super_block *sb) | 103 | static struct inode *jfs_alloc_inode(struct super_block *sb) |
104 | { | 104 | { |
105 | struct jfs_inode_info *jfs_inode; | 105 | struct jfs_inode_info *jfs_inode; |
106 | 106 | ||
107 | jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); | 107 | jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); |
108 | if (!jfs_inode) | 108 | if (!jfs_inode) |
109 | return NULL; | 109 | return NULL; |
110 | return &jfs_inode->vfs_inode; | 110 | return &jfs_inode->vfs_inode; |
111 | } | 111 | } |
112 | 112 | ||
113 | static void jfs_destroy_inode(struct inode *inode) | 113 | static void jfs_destroy_inode(struct inode *inode) |
114 | { | 114 | { |
115 | struct jfs_inode_info *ji = JFS_IP(inode); | 115 | struct jfs_inode_info *ji = JFS_IP(inode); |
116 | 116 | ||
117 | spin_lock_irq(&ji->ag_lock); | 117 | spin_lock_irq(&ji->ag_lock); |
118 | if (ji->active_ag != -1) { | 118 | if (ji->active_ag != -1) { |
119 | struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; | 119 | struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; |
120 | atomic_dec(&bmap->db_active[ji->active_ag]); | 120 | atomic_dec(&bmap->db_active[ji->active_ag]); |
121 | ji->active_ag = -1; | 121 | ji->active_ag = -1; |
122 | } | 122 | } |
123 | spin_unlock_irq(&ji->ag_lock); | 123 | spin_unlock_irq(&ji->ag_lock); |
124 | 124 | ||
125 | #ifdef CONFIG_JFS_POSIX_ACL | 125 | #ifdef CONFIG_JFS_POSIX_ACL |
126 | if (ji->i_acl != JFS_ACL_NOT_CACHED) { | 126 | if (ji->i_acl != JFS_ACL_NOT_CACHED) { |
127 | posix_acl_release(ji->i_acl); | 127 | posix_acl_release(ji->i_acl); |
128 | ji->i_acl = JFS_ACL_NOT_CACHED; | 128 | ji->i_acl = JFS_ACL_NOT_CACHED; |
129 | } | 129 | } |
130 | if (ji->i_default_acl != JFS_ACL_NOT_CACHED) { | 130 | if (ji->i_default_acl != JFS_ACL_NOT_CACHED) { |
131 | posix_acl_release(ji->i_default_acl); | 131 | posix_acl_release(ji->i_default_acl); |
132 | ji->i_default_acl = JFS_ACL_NOT_CACHED; | 132 | ji->i_default_acl = JFS_ACL_NOT_CACHED; |
133 | } | 133 | } |
134 | #endif | 134 | #endif |
135 | 135 | ||
136 | kmem_cache_free(jfs_inode_cachep, ji); | 136 | kmem_cache_free(jfs_inode_cachep, ji); |
137 | } | 137 | } |
138 | 138 | ||
139 | static int jfs_statfs(struct super_block *sb, struct kstatfs *buf) | 139 | static int jfs_statfs(struct super_block *sb, struct kstatfs *buf) |
140 | { | 140 | { |
141 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 141 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
142 | s64 maxinodes; | 142 | s64 maxinodes; |
143 | struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap; | 143 | struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap; |
144 | 144 | ||
145 | jfs_info("In jfs_statfs"); | 145 | jfs_info("In jfs_statfs"); |
146 | buf->f_type = JFS_SUPER_MAGIC; | 146 | buf->f_type = JFS_SUPER_MAGIC; |
147 | buf->f_bsize = sbi->bsize; | 147 | buf->f_bsize = sbi->bsize; |
148 | buf->f_blocks = sbi->bmap->db_mapsize; | 148 | buf->f_blocks = sbi->bmap->db_mapsize; |
149 | buf->f_bfree = sbi->bmap->db_nfree; | 149 | buf->f_bfree = sbi->bmap->db_nfree; |
150 | buf->f_bavail = sbi->bmap->db_nfree; | 150 | buf->f_bavail = sbi->bmap->db_nfree; |
151 | /* | 151 | /* |
152 | * If we really return the number of allocated & free inodes, some | 152 | * If we really return the number of allocated & free inodes, some |
153 | * applications will fail because they won't see enough free inodes. | 153 | * applications will fail because they won't see enough free inodes. |
154 | * We'll try to calculate some guess as to how may inodes we can | 154 | * We'll try to calculate some guess as to how may inodes we can |
155 | * really allocate | 155 | * really allocate |
156 | * | 156 | * |
157 | * buf->f_files = atomic_read(&imap->im_numinos); | 157 | * buf->f_files = atomic_read(&imap->im_numinos); |
158 | * buf->f_ffree = atomic_read(&imap->im_numfree); | 158 | * buf->f_ffree = atomic_read(&imap->im_numfree); |
159 | */ | 159 | */ |
160 | maxinodes = min((s64) atomic_read(&imap->im_numinos) + | 160 | maxinodes = min((s64) atomic_read(&imap->im_numinos) + |
161 | ((sbi->bmap->db_nfree >> imap->im_l2nbperiext) | 161 | ((sbi->bmap->db_nfree >> imap->im_l2nbperiext) |
162 | << L2INOSPEREXT), (s64) 0xffffffffLL); | 162 | << L2INOSPEREXT), (s64) 0xffffffffLL); |
163 | buf->f_files = maxinodes; | 163 | buf->f_files = maxinodes; |
164 | buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) - | 164 | buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) - |
165 | atomic_read(&imap->im_numfree)); | 165 | atomic_read(&imap->im_numfree)); |
166 | 166 | ||
167 | buf->f_namelen = JFS_NAME_MAX; | 167 | buf->f_namelen = JFS_NAME_MAX; |
168 | return 0; | 168 | return 0; |
169 | } | 169 | } |
170 | 170 | ||
171 | static void jfs_put_super(struct super_block *sb) | 171 | static void jfs_put_super(struct super_block *sb) |
172 | { | 172 | { |
173 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 173 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
174 | int rc; | 174 | int rc; |
175 | 175 | ||
176 | jfs_info("In jfs_put_super"); | 176 | jfs_info("In jfs_put_super"); |
177 | rc = jfs_umount(sb); | 177 | rc = jfs_umount(sb); |
178 | if (rc) | 178 | if (rc) |
179 | jfs_err("jfs_umount failed with return code %d", rc); | 179 | jfs_err("jfs_umount failed with return code %d", rc); |
180 | if (sbi->nls_tab) | 180 | if (sbi->nls_tab) |
181 | unload_nls(sbi->nls_tab); | 181 | unload_nls(sbi->nls_tab); |
182 | sbi->nls_tab = NULL; | 182 | sbi->nls_tab = NULL; |
183 | 183 | ||
184 | truncate_inode_pages(sbi->direct_inode->i_mapping, 0); | 184 | truncate_inode_pages(sbi->direct_inode->i_mapping, 0); |
185 | iput(sbi->direct_inode); | 185 | iput(sbi->direct_inode); |
186 | sbi->direct_inode = NULL; | 186 | sbi->direct_inode = NULL; |
187 | 187 | ||
188 | kfree(sbi); | 188 | kfree(sbi); |
189 | } | 189 | } |
190 | 190 | ||
191 | enum { | 191 | enum { |
192 | Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, | 192 | Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, |
193 | Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, | 193 | Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, |
194 | }; | 194 | }; |
195 | 195 | ||
196 | static match_table_t tokens = { | 196 | static match_table_t tokens = { |
197 | {Opt_integrity, "integrity"}, | 197 | {Opt_integrity, "integrity"}, |
198 | {Opt_nointegrity, "nointegrity"}, | 198 | {Opt_nointegrity, "nointegrity"}, |
199 | {Opt_iocharset, "iocharset=%s"}, | 199 | {Opt_iocharset, "iocharset=%s"}, |
200 | {Opt_resize, "resize=%u"}, | 200 | {Opt_resize, "resize=%u"}, |
201 | {Opt_resize_nosize, "resize"}, | 201 | {Opt_resize_nosize, "resize"}, |
202 | {Opt_errors, "errors=%s"}, | 202 | {Opt_errors, "errors=%s"}, |
203 | {Opt_ignore, "noquota"}, | 203 | {Opt_ignore, "noquota"}, |
204 | {Opt_ignore, "quota"}, | 204 | {Opt_ignore, "quota"}, |
205 | {Opt_ignore, "usrquota"}, | 205 | {Opt_ignore, "usrquota"}, |
206 | {Opt_ignore, "grpquota"}, | 206 | {Opt_ignore, "grpquota"}, |
207 | {Opt_err, NULL} | 207 | {Opt_err, NULL} |
208 | }; | 208 | }; |
209 | 209 | ||
210 | static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, | 210 | static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, |
211 | int *flag) | 211 | int *flag) |
212 | { | 212 | { |
213 | void *nls_map = (void *)-1; /* -1: no change; NULL: none */ | 213 | void *nls_map = (void *)-1; /* -1: no change; NULL: none */ |
214 | char *p; | 214 | char *p; |
215 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 215 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
216 | 216 | ||
217 | *newLVSize = 0; | 217 | *newLVSize = 0; |
218 | 218 | ||
219 | if (!options) | 219 | if (!options) |
220 | return 1; | 220 | return 1; |
221 | 221 | ||
222 | while ((p = strsep(&options, ",")) != NULL) { | 222 | while ((p = strsep(&options, ",")) != NULL) { |
223 | substring_t args[MAX_OPT_ARGS]; | 223 | substring_t args[MAX_OPT_ARGS]; |
224 | int token; | 224 | int token; |
225 | if (!*p) | 225 | if (!*p) |
226 | continue; | 226 | continue; |
227 | 227 | ||
228 | token = match_token(p, tokens, args); | 228 | token = match_token(p, tokens, args); |
229 | switch (token) { | 229 | switch (token) { |
230 | case Opt_integrity: | 230 | case Opt_integrity: |
231 | *flag &= ~JFS_NOINTEGRITY; | 231 | *flag &= ~JFS_NOINTEGRITY; |
232 | break; | 232 | break; |
233 | case Opt_nointegrity: | 233 | case Opt_nointegrity: |
234 | *flag |= JFS_NOINTEGRITY; | 234 | *flag |= JFS_NOINTEGRITY; |
235 | break; | 235 | break; |
236 | case Opt_ignore: | 236 | case Opt_ignore: |
237 | /* Silently ignore the quota options */ | 237 | /* Silently ignore the quota options */ |
238 | /* Don't do anything ;-) */ | 238 | /* Don't do anything ;-) */ |
239 | break; | 239 | break; |
240 | case Opt_iocharset: | 240 | case Opt_iocharset: |
241 | if (nls_map && nls_map != (void *) -1) | 241 | if (nls_map && nls_map != (void *) -1) |
242 | unload_nls(nls_map); | 242 | unload_nls(nls_map); |
243 | if (!strcmp(args[0].from, "none")) | 243 | if (!strcmp(args[0].from, "none")) |
244 | nls_map = NULL; | 244 | nls_map = NULL; |
245 | else { | 245 | else { |
246 | nls_map = load_nls(args[0].from); | 246 | nls_map = load_nls(args[0].from); |
247 | if (!nls_map) { | 247 | if (!nls_map) { |
248 | printk(KERN_ERR | 248 | printk(KERN_ERR |
249 | "JFS: charset not found\n"); | 249 | "JFS: charset not found\n"); |
250 | goto cleanup; | 250 | goto cleanup; |
251 | } | 251 | } |
252 | } | 252 | } |
253 | break; | 253 | break; |
254 | case Opt_resize: | 254 | case Opt_resize: |
255 | { | 255 | { |
256 | char *resize = args[0].from; | 256 | char *resize = args[0].from; |
257 | *newLVSize = simple_strtoull(resize, &resize, 0); | 257 | *newLVSize = simple_strtoull(resize, &resize, 0); |
258 | break; | 258 | break; |
259 | } | 259 | } |
260 | case Opt_resize_nosize: | 260 | case Opt_resize_nosize: |
261 | { | 261 | { |
262 | *newLVSize = sb->s_bdev->bd_inode->i_size >> | 262 | *newLVSize = sb->s_bdev->bd_inode->i_size >> |
263 | sb->s_blocksize_bits; | 263 | sb->s_blocksize_bits; |
264 | if (*newLVSize == 0) | 264 | if (*newLVSize == 0) |
265 | printk(KERN_ERR | 265 | printk(KERN_ERR |
266 | "JFS: Cannot determine volume size\n"); | 266 | "JFS: Cannot determine volume size\n"); |
267 | break; | 267 | break; |
268 | } | 268 | } |
269 | case Opt_errors: | 269 | case Opt_errors: |
270 | { | 270 | { |
271 | char *errors = args[0].from; | 271 | char *errors = args[0].from; |
272 | if (!errors || !*errors) | 272 | if (!errors || !*errors) |
273 | goto cleanup; | 273 | goto cleanup; |
274 | if (!strcmp(errors, "continue")) { | 274 | if (!strcmp(errors, "continue")) { |
275 | *flag &= ~JFS_ERR_REMOUNT_RO; | 275 | *flag &= ~JFS_ERR_REMOUNT_RO; |
276 | *flag &= ~JFS_ERR_PANIC; | 276 | *flag &= ~JFS_ERR_PANIC; |
277 | *flag |= JFS_ERR_CONTINUE; | 277 | *flag |= JFS_ERR_CONTINUE; |
278 | } else if (!strcmp(errors, "remount-ro")) { | 278 | } else if (!strcmp(errors, "remount-ro")) { |
279 | *flag &= ~JFS_ERR_CONTINUE; | 279 | *flag &= ~JFS_ERR_CONTINUE; |
280 | *flag &= ~JFS_ERR_PANIC; | 280 | *flag &= ~JFS_ERR_PANIC; |
281 | *flag |= JFS_ERR_REMOUNT_RO; | 281 | *flag |= JFS_ERR_REMOUNT_RO; |
282 | } else if (!strcmp(errors, "panic")) { | 282 | } else if (!strcmp(errors, "panic")) { |
283 | *flag &= ~JFS_ERR_CONTINUE; | 283 | *flag &= ~JFS_ERR_CONTINUE; |
284 | *flag &= ~JFS_ERR_REMOUNT_RO; | 284 | *flag &= ~JFS_ERR_REMOUNT_RO; |
285 | *flag |= JFS_ERR_PANIC; | 285 | *flag |= JFS_ERR_PANIC; |
286 | } else { | 286 | } else { |
287 | printk(KERN_ERR | 287 | printk(KERN_ERR |
288 | "JFS: %s is an invalid error handler\n", | 288 | "JFS: %s is an invalid error handler\n", |
289 | errors); | 289 | errors); |
290 | goto cleanup; | 290 | goto cleanup; |
291 | } | 291 | } |
292 | break; | 292 | break; |
293 | } | 293 | } |
294 | default: | 294 | default: |
295 | printk("jfs: Unrecognized mount option \"%s\" " | 295 | printk("jfs: Unrecognized mount option \"%s\" " |
296 | " or missing value\n", p); | 296 | " or missing value\n", p); |
297 | goto cleanup; | 297 | goto cleanup; |
298 | } | 298 | } |
299 | } | 299 | } |
300 | 300 | ||
301 | if (nls_map != (void *) -1) { | 301 | if (nls_map != (void *) -1) { |
302 | /* Discard old (if remount) */ | 302 | /* Discard old (if remount) */ |
303 | if (sbi->nls_tab) | 303 | if (sbi->nls_tab) |
304 | unload_nls(sbi->nls_tab); | 304 | unload_nls(sbi->nls_tab); |
305 | sbi->nls_tab = nls_map; | 305 | sbi->nls_tab = nls_map; |
306 | } | 306 | } |
307 | return 1; | 307 | return 1; |
308 | 308 | ||
309 | cleanup: | 309 | cleanup: |
310 | if (nls_map && nls_map != (void *) -1) | 310 | if (nls_map && nls_map != (void *) -1) |
311 | unload_nls(nls_map); | 311 | unload_nls(nls_map); |
312 | return 0; | 312 | return 0; |
313 | } | 313 | } |
314 | 314 | ||
315 | static int jfs_remount(struct super_block *sb, int *flags, char *data) | 315 | static int jfs_remount(struct super_block *sb, int *flags, char *data) |
316 | { | 316 | { |
317 | s64 newLVSize = 0; | 317 | s64 newLVSize = 0; |
318 | int rc = 0; | 318 | int rc = 0; |
319 | int flag = JFS_SBI(sb)->flag; | 319 | int flag = JFS_SBI(sb)->flag; |
320 | 320 | ||
321 | if (!parse_options(data, sb, &newLVSize, &flag)) { | 321 | if (!parse_options(data, sb, &newLVSize, &flag)) { |
322 | return -EINVAL; | 322 | return -EINVAL; |
323 | } | 323 | } |
324 | if (newLVSize) { | 324 | if (newLVSize) { |
325 | if (sb->s_flags & MS_RDONLY) { | 325 | if (sb->s_flags & MS_RDONLY) { |
326 | printk(KERN_ERR | 326 | printk(KERN_ERR |
327 | "JFS: resize requires volume to be mounted read-write\n"); | 327 | "JFS: resize requires volume to be mounted read-write\n"); |
328 | return -EROFS; | 328 | return -EROFS; |
329 | } | 329 | } |
330 | rc = jfs_extendfs(sb, newLVSize, 0); | 330 | rc = jfs_extendfs(sb, newLVSize, 0); |
331 | if (rc) | 331 | if (rc) |
332 | return rc; | 332 | return rc; |
333 | } | 333 | } |
334 | 334 | ||
335 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { | 335 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { |
336 | /* | 336 | /* |
337 | * Invalidate any previously read metadata. fsck may have | 337 | * Invalidate any previously read metadata. fsck may have |
338 | * changed the on-disk data since we mounted r/o | 338 | * changed the on-disk data since we mounted r/o |
339 | */ | 339 | */ |
340 | truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); | 340 | truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); |
341 | 341 | ||
342 | JFS_SBI(sb)->flag = flag; | 342 | JFS_SBI(sb)->flag = flag; |
343 | return jfs_mount_rw(sb, 1); | 343 | return jfs_mount_rw(sb, 1); |
344 | } | 344 | } |
345 | if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { | 345 | if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { |
346 | rc = jfs_umount_rw(sb); | 346 | rc = jfs_umount_rw(sb); |
347 | JFS_SBI(sb)->flag = flag; | 347 | JFS_SBI(sb)->flag = flag; |
348 | return rc; | 348 | return rc; |
349 | } | 349 | } |
350 | if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) | 350 | if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) |
351 | if (!(sb->s_flags & MS_RDONLY)) { | 351 | if (!(sb->s_flags & MS_RDONLY)) { |
352 | rc = jfs_umount_rw(sb); | 352 | rc = jfs_umount_rw(sb); |
353 | if (rc) | 353 | if (rc) |
354 | return rc; | 354 | return rc; |
355 | JFS_SBI(sb)->flag = flag; | 355 | JFS_SBI(sb)->flag = flag; |
356 | return jfs_mount_rw(sb, 1); | 356 | return jfs_mount_rw(sb, 1); |
357 | } | 357 | } |
358 | JFS_SBI(sb)->flag = flag; | 358 | JFS_SBI(sb)->flag = flag; |
359 | 359 | ||
360 | return 0; | 360 | return 0; |
361 | } | 361 | } |
362 | 362 | ||
363 | static int jfs_fill_super(struct super_block *sb, void *data, int silent) | 363 | static int jfs_fill_super(struct super_block *sb, void *data, int silent) |
364 | { | 364 | { |
365 | struct jfs_sb_info *sbi; | 365 | struct jfs_sb_info *sbi; |
366 | struct inode *inode; | 366 | struct inode *inode; |
367 | int rc; | 367 | int rc; |
368 | s64 newLVSize = 0; | 368 | s64 newLVSize = 0; |
369 | int flag; | 369 | int flag; |
370 | 370 | ||
371 | jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); | 371 | jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); |
372 | 372 | ||
373 | if (!new_valid_dev(sb->s_bdev->bd_dev)) | 373 | if (!new_valid_dev(sb->s_bdev->bd_dev)) |
374 | return -EOVERFLOW; | 374 | return -EOVERFLOW; |
375 | 375 | ||
376 | sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); | 376 | sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); |
377 | if (!sbi) | 377 | if (!sbi) |
378 | return -ENOSPC; | 378 | return -ENOSPC; |
379 | memset(sbi, 0, sizeof (struct jfs_sb_info)); | 379 | memset(sbi, 0, sizeof (struct jfs_sb_info)); |
380 | sb->s_fs_info = sbi; | 380 | sb->s_fs_info = sbi; |
381 | sbi->sb = sb; | 381 | sbi->sb = sb; |
382 | 382 | ||
383 | /* initialize the mount flag and determine the default error handler */ | 383 | /* initialize the mount flag and determine the default error handler */ |
384 | flag = JFS_ERR_REMOUNT_RO; | 384 | flag = JFS_ERR_REMOUNT_RO; |
385 | 385 | ||
386 | if (!parse_options((char *) data, sb, &newLVSize, &flag)) { | 386 | if (!parse_options((char *) data, sb, &newLVSize, &flag)) { |
387 | kfree(sbi); | 387 | kfree(sbi); |
388 | return -EINVAL; | 388 | return -EINVAL; |
389 | } | 389 | } |
390 | sbi->flag = flag; | 390 | sbi->flag = flag; |
391 | 391 | ||
392 | #ifdef CONFIG_JFS_POSIX_ACL | 392 | #ifdef CONFIG_JFS_POSIX_ACL |
393 | sb->s_flags |= MS_POSIXACL; | 393 | sb->s_flags |= MS_POSIXACL; |
394 | #endif | 394 | #endif |
395 | 395 | ||
396 | if (newLVSize) { | 396 | if (newLVSize) { |
397 | printk(KERN_ERR "resize option for remount only\n"); | 397 | printk(KERN_ERR "resize option for remount only\n"); |
398 | return -EINVAL; | 398 | return -EINVAL; |
399 | } | 399 | } |
400 | 400 | ||
401 | /* | 401 | /* |
402 | * Initialize blocksize to 4K. | 402 | * Initialize blocksize to 4K. |
403 | */ | 403 | */ |
404 | sb_set_blocksize(sb, PSIZE); | 404 | sb_set_blocksize(sb, PSIZE); |
405 | 405 | ||
406 | /* | 406 | /* |
407 | * Set method vectors. | 407 | * Set method vectors. |
408 | */ | 408 | */ |
409 | sb->s_op = &jfs_super_operations; | 409 | sb->s_op = &jfs_super_operations; |
410 | sb->s_export_op = &jfs_export_operations; | 410 | sb->s_export_op = &jfs_export_operations; |
411 | 411 | ||
412 | /* | 412 | /* |
413 | * Initialize direct-mapping inode/address-space | 413 | * Initialize direct-mapping inode/address-space |
414 | */ | 414 | */ |
415 | inode = new_inode(sb); | 415 | inode = new_inode(sb); |
416 | if (inode == NULL) | 416 | if (inode == NULL) |
417 | goto out_kfree; | 417 | goto out_kfree; |
418 | inode->i_ino = 0; | 418 | inode->i_ino = 0; |
419 | inode->i_nlink = 1; | 419 | inode->i_nlink = 1; |
420 | inode->i_size = sb->s_bdev->bd_inode->i_size; | 420 | inode->i_size = sb->s_bdev->bd_inode->i_size; |
421 | inode->i_mapping->a_ops = &jfs_metapage_aops; | 421 | inode->i_mapping->a_ops = &jfs_metapage_aops; |
422 | mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); | 422 | mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); |
423 | 423 | ||
424 | sbi->direct_inode = inode; | 424 | sbi->direct_inode = inode; |
425 | 425 | ||
426 | rc = jfs_mount(sb); | 426 | rc = jfs_mount(sb); |
427 | if (rc) { | 427 | if (rc) { |
428 | if (!silent) { | 428 | if (!silent) { |
429 | jfs_err("jfs_mount failed w/return code = %d", rc); | 429 | jfs_err("jfs_mount failed w/return code = %d", rc); |
430 | } | 430 | } |
431 | goto out_mount_failed; | 431 | goto out_mount_failed; |
432 | } | 432 | } |
433 | if (sb->s_flags & MS_RDONLY) | 433 | if (sb->s_flags & MS_RDONLY) |
434 | sbi->log = NULL; | 434 | sbi->log = NULL; |
435 | else { | 435 | else { |
436 | rc = jfs_mount_rw(sb, 0); | 436 | rc = jfs_mount_rw(sb, 0); |
437 | if (rc) { | 437 | if (rc) { |
438 | if (!silent) { | 438 | if (!silent) { |
439 | jfs_err("jfs_mount_rw failed, return code = %d", | 439 | jfs_err("jfs_mount_rw failed, return code = %d", |
440 | rc); | 440 | rc); |
441 | } | 441 | } |
442 | goto out_no_rw; | 442 | goto out_no_rw; |
443 | } | 443 | } |
444 | } | 444 | } |
445 | 445 | ||
446 | sb->s_magic = JFS_SUPER_MAGIC; | 446 | sb->s_magic = JFS_SUPER_MAGIC; |
447 | 447 | ||
448 | inode = iget(sb, ROOT_I); | 448 | inode = iget(sb, ROOT_I); |
449 | if (!inode || is_bad_inode(inode)) | 449 | if (!inode || is_bad_inode(inode)) |
450 | goto out_no_root; | 450 | goto out_no_root; |
451 | sb->s_root = d_alloc_root(inode); | 451 | sb->s_root = d_alloc_root(inode); |
452 | if (!sb->s_root) | 452 | if (!sb->s_root) |
453 | goto out_no_root; | 453 | goto out_no_root; |
454 | 454 | ||
455 | if (sbi->mntflag & JFS_OS2) | 455 | if (sbi->mntflag & JFS_OS2) |
456 | sb->s_root->d_op = &jfs_ci_dentry_operations; | 456 | sb->s_root->d_op = &jfs_ci_dentry_operations; |
457 | 457 | ||
458 | /* logical blocks are represented by 40 bits in pxd_t, etc. */ | 458 | /* logical blocks are represented by 40 bits in pxd_t, etc. */ |
459 | sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; | 459 | sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; |
460 | #if BITS_PER_LONG == 32 | 460 | #if BITS_PER_LONG == 32 |
461 | /* | 461 | /* |
462 | * Page cache is indexed by long. | 462 | * Page cache is indexed by long. |
463 | * I would use MAX_LFS_FILESIZE, but it's only half as big | 463 | * I would use MAX_LFS_FILESIZE, but it's only half as big |
464 | */ | 464 | */ |
465 | sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes); | 465 | sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes); |
466 | #endif | 466 | #endif |
467 | sb->s_time_gran = 1; | 467 | sb->s_time_gran = 1; |
468 | return 0; | 468 | return 0; |
469 | 469 | ||
470 | out_no_root: | 470 | out_no_root: |
471 | jfs_err("jfs_read_super: get root inode failed"); | 471 | jfs_err("jfs_read_super: get root inode failed"); |
472 | if (inode) | 472 | if (inode) |
473 | iput(inode); | 473 | iput(inode); |
474 | 474 | ||
475 | out_no_rw: | 475 | out_no_rw: |
476 | rc = jfs_umount(sb); | 476 | rc = jfs_umount(sb); |
477 | if (rc) { | 477 | if (rc) { |
478 | jfs_err("jfs_umount failed with return code %d", rc); | 478 | jfs_err("jfs_umount failed with return code %d", rc); |
479 | } | 479 | } |
480 | out_mount_failed: | 480 | out_mount_failed: |
481 | filemap_fdatawrite(sbi->direct_inode->i_mapping); | 481 | filemap_fdatawrite(sbi->direct_inode->i_mapping); |
482 | filemap_fdatawait(sbi->direct_inode->i_mapping); | 482 | filemap_fdatawait(sbi->direct_inode->i_mapping); |
483 | truncate_inode_pages(sbi->direct_inode->i_mapping, 0); | 483 | truncate_inode_pages(sbi->direct_inode->i_mapping, 0); |
484 | make_bad_inode(sbi->direct_inode); | 484 | make_bad_inode(sbi->direct_inode); |
485 | iput(sbi->direct_inode); | 485 | iput(sbi->direct_inode); |
486 | sbi->direct_inode = NULL; | 486 | sbi->direct_inode = NULL; |
487 | out_kfree: | 487 | out_kfree: |
488 | if (sbi->nls_tab) | 488 | if (sbi->nls_tab) |
489 | unload_nls(sbi->nls_tab); | 489 | unload_nls(sbi->nls_tab); |
490 | kfree(sbi); | 490 | kfree(sbi); |
491 | return -EINVAL; | 491 | return -EINVAL; |
492 | } | 492 | } |
493 | 493 | ||
494 | static void jfs_write_super_lockfs(struct super_block *sb) | 494 | static void jfs_write_super_lockfs(struct super_block *sb) |
495 | { | 495 | { |
496 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 496 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
497 | struct jfs_log *log = sbi->log; | 497 | struct jfs_log *log = sbi->log; |
498 | 498 | ||
499 | if (!(sb->s_flags & MS_RDONLY)) { | 499 | if (!(sb->s_flags & MS_RDONLY)) { |
500 | txQuiesce(sb); | 500 | txQuiesce(sb); |
501 | lmLogShutdown(log); | 501 | lmLogShutdown(log); |
502 | updateSuper(sb, FM_CLEAN); | 502 | updateSuper(sb, FM_CLEAN); |
503 | } | 503 | } |
504 | } | 504 | } |
505 | 505 | ||
506 | static void jfs_unlockfs(struct super_block *sb) | 506 | static void jfs_unlockfs(struct super_block *sb) |
507 | { | 507 | { |
508 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 508 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
509 | struct jfs_log *log = sbi->log; | 509 | struct jfs_log *log = sbi->log; |
510 | int rc = 0; | 510 | int rc = 0; |
511 | 511 | ||
512 | if (!(sb->s_flags & MS_RDONLY)) { | 512 | if (!(sb->s_flags & MS_RDONLY)) { |
513 | updateSuper(sb, FM_MOUNT); | 513 | updateSuper(sb, FM_MOUNT); |
514 | if ((rc = lmLogInit(log))) | 514 | if ((rc = lmLogInit(log))) |
515 | jfs_err("jfs_unlock failed with return code %d", rc); | 515 | jfs_err("jfs_unlock failed with return code %d", rc); |
516 | else | 516 | else |
517 | txResume(sb); | 517 | txResume(sb); |
518 | } | 518 | } |
519 | } | 519 | } |
520 | 520 | ||
521 | static struct super_block *jfs_get_sb(struct file_system_type *fs_type, | 521 | static struct super_block *jfs_get_sb(struct file_system_type *fs_type, |
522 | int flags, const char *dev_name, void *data) | 522 | int flags, const char *dev_name, void *data) |
523 | { | 523 | { |
524 | return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super); | 524 | return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super); |
525 | } | 525 | } |
526 | 526 | ||
527 | static int jfs_sync_fs(struct super_block *sb, int wait) | 527 | static int jfs_sync_fs(struct super_block *sb, int wait) |
528 | { | 528 | { |
529 | struct jfs_log *log = JFS_SBI(sb)->log; | 529 | struct jfs_log *log = JFS_SBI(sb)->log; |
530 | 530 | ||
531 | /* log == NULL indicates read-only mount */ | 531 | /* log == NULL indicates read-only mount */ |
532 | if (log) { | 532 | if (log) { |
533 | jfs_flush_journal(log, wait); | 533 | jfs_flush_journal(log, wait); |
534 | jfs_syncpt(log); | 534 | jfs_syncpt(log, 0); |
535 | } | 535 | } |
536 | 536 | ||
537 | return 0; | 537 | return 0; |
538 | } | 538 | } |
539 | 539 | ||
540 | static struct super_operations jfs_super_operations = { | 540 | static struct super_operations jfs_super_operations = { |
541 | .alloc_inode = jfs_alloc_inode, | 541 | .alloc_inode = jfs_alloc_inode, |
542 | .destroy_inode = jfs_destroy_inode, | 542 | .destroy_inode = jfs_destroy_inode, |
543 | .read_inode = jfs_read_inode, | 543 | .read_inode = jfs_read_inode, |
544 | .dirty_inode = jfs_dirty_inode, | 544 | .dirty_inode = jfs_dirty_inode, |
545 | .write_inode = jfs_write_inode, | 545 | .write_inode = jfs_write_inode, |
546 | .delete_inode = jfs_delete_inode, | 546 | .delete_inode = jfs_delete_inode, |
547 | .put_super = jfs_put_super, | 547 | .put_super = jfs_put_super, |
548 | .sync_fs = jfs_sync_fs, | 548 | .sync_fs = jfs_sync_fs, |
549 | .write_super_lockfs = jfs_write_super_lockfs, | 549 | .write_super_lockfs = jfs_write_super_lockfs, |
550 | .unlockfs = jfs_unlockfs, | 550 | .unlockfs = jfs_unlockfs, |
551 | .statfs = jfs_statfs, | 551 | .statfs = jfs_statfs, |
552 | .remount_fs = jfs_remount, | 552 | .remount_fs = jfs_remount, |
553 | }; | 553 | }; |
554 | 554 | ||
555 | static struct export_operations jfs_export_operations = { | 555 | static struct export_operations jfs_export_operations = { |
556 | .get_parent = jfs_get_parent, | 556 | .get_parent = jfs_get_parent, |
557 | }; | 557 | }; |
558 | 558 | ||
559 | static struct file_system_type jfs_fs_type = { | 559 | static struct file_system_type jfs_fs_type = { |
560 | .owner = THIS_MODULE, | 560 | .owner = THIS_MODULE, |
561 | .name = "jfs", | 561 | .name = "jfs", |
562 | .get_sb = jfs_get_sb, | 562 | .get_sb = jfs_get_sb, |
563 | .kill_sb = kill_block_super, | 563 | .kill_sb = kill_block_super, |
564 | .fs_flags = FS_REQUIRES_DEV, | 564 | .fs_flags = FS_REQUIRES_DEV, |
565 | }; | 565 | }; |
566 | 566 | ||
567 | static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) | 567 | static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) |
568 | { | 568 | { |
569 | struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; | 569 | struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; |
570 | 570 | ||
571 | if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == | 571 | if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == |
572 | SLAB_CTOR_CONSTRUCTOR) { | 572 | SLAB_CTOR_CONSTRUCTOR) { |
573 | memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); | 573 | memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); |
574 | INIT_LIST_HEAD(&jfs_ip->anon_inode_list); | 574 | INIT_LIST_HEAD(&jfs_ip->anon_inode_list); |
575 | init_rwsem(&jfs_ip->rdwrlock); | 575 | init_rwsem(&jfs_ip->rdwrlock); |
576 | init_MUTEX(&jfs_ip->commit_sem); | 576 | init_MUTEX(&jfs_ip->commit_sem); |
577 | init_rwsem(&jfs_ip->xattr_sem); | 577 | init_rwsem(&jfs_ip->xattr_sem); |
578 | spin_lock_init(&jfs_ip->ag_lock); | 578 | spin_lock_init(&jfs_ip->ag_lock); |
579 | jfs_ip->active_ag = -1; | 579 | jfs_ip->active_ag = -1; |
580 | #ifdef CONFIG_JFS_POSIX_ACL | 580 | #ifdef CONFIG_JFS_POSIX_ACL |
581 | jfs_ip->i_acl = JFS_ACL_NOT_CACHED; | 581 | jfs_ip->i_acl = JFS_ACL_NOT_CACHED; |
582 | jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; | 582 | jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; |
583 | #endif | 583 | #endif |
584 | inode_init_once(&jfs_ip->vfs_inode); | 584 | inode_init_once(&jfs_ip->vfs_inode); |
585 | } | 585 | } |
586 | } | 586 | } |
587 | 587 | ||
588 | static int __init init_jfs_fs(void) | 588 | static int __init init_jfs_fs(void) |
589 | { | 589 | { |
590 | int i; | 590 | int i; |
591 | int rc; | 591 | int rc; |
592 | 592 | ||
593 | jfs_inode_cachep = | 593 | jfs_inode_cachep = |
594 | kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, | 594 | kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, |
595 | SLAB_RECLAIM_ACCOUNT, init_once, NULL); | 595 | SLAB_RECLAIM_ACCOUNT, init_once, NULL); |
596 | if (jfs_inode_cachep == NULL) | 596 | if (jfs_inode_cachep == NULL) |
597 | return -ENOMEM; | 597 | return -ENOMEM; |
598 | 598 | ||
599 | /* | 599 | /* |
600 | * Metapage initialization | 600 | * Metapage initialization |
601 | */ | 601 | */ |
602 | rc = metapage_init(); | 602 | rc = metapage_init(); |
603 | if (rc) { | 603 | if (rc) { |
604 | jfs_err("metapage_init failed w/rc = %d", rc); | 604 | jfs_err("metapage_init failed w/rc = %d", rc); |
605 | goto free_slab; | 605 | goto free_slab; |
606 | } | 606 | } |
607 | 607 | ||
608 | /* | 608 | /* |
609 | * Transaction Manager initialization | 609 | * Transaction Manager initialization |
610 | */ | 610 | */ |
611 | rc = txInit(); | 611 | rc = txInit(); |
612 | if (rc) { | 612 | if (rc) { |
613 | jfs_err("txInit failed w/rc = %d", rc); | 613 | jfs_err("txInit failed w/rc = %d", rc); |
614 | goto free_metapage; | 614 | goto free_metapage; |
615 | } | 615 | } |
616 | 616 | ||
617 | /* | 617 | /* |
618 | * I/O completion thread (endio) | 618 | * I/O completion thread (endio) |
619 | */ | 619 | */ |
620 | jfsIOthread = kernel_thread(jfsIOWait, NULL, CLONE_KERNEL); | 620 | jfsIOthread = kernel_thread(jfsIOWait, NULL, CLONE_KERNEL); |
621 | if (jfsIOthread < 0) { | 621 | if (jfsIOthread < 0) { |
622 | jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsIOthread); | 622 | jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsIOthread); |
623 | goto end_txmngr; | 623 | goto end_txmngr; |
624 | } | 624 | } |
625 | wait_for_completion(&jfsIOwait); /* Wait until thread starts */ | 625 | wait_for_completion(&jfsIOwait); /* Wait until thread starts */ |
626 | 626 | ||
627 | if (commit_threads < 1) | 627 | if (commit_threads < 1) |
628 | commit_threads = num_online_cpus(); | 628 | commit_threads = num_online_cpus(); |
629 | if (commit_threads > MAX_COMMIT_THREADS) | 629 | if (commit_threads > MAX_COMMIT_THREADS) |
630 | commit_threads = MAX_COMMIT_THREADS; | 630 | commit_threads = MAX_COMMIT_THREADS; |
631 | 631 | ||
632 | for (i = 0; i < commit_threads; i++) { | 632 | for (i = 0; i < commit_threads; i++) { |
633 | jfsCommitThread[i] = kernel_thread(jfs_lazycommit, NULL, | 633 | jfsCommitThread[i] = kernel_thread(jfs_lazycommit, NULL, |
634 | CLONE_KERNEL); | 634 | CLONE_KERNEL); |
635 | if (jfsCommitThread[i] < 0) { | 635 | if (jfsCommitThread[i] < 0) { |
636 | jfs_err("init_jfs_fs: fork failed w/rc = %d", | 636 | jfs_err("init_jfs_fs: fork failed w/rc = %d", |
637 | jfsCommitThread[i]); | 637 | jfsCommitThread[i]); |
638 | commit_threads = i; | 638 | commit_threads = i; |
639 | goto kill_committask; | 639 | goto kill_committask; |
640 | } | 640 | } |
641 | /* Wait until thread starts */ | 641 | /* Wait until thread starts */ |
642 | wait_for_completion(&jfsIOwait); | 642 | wait_for_completion(&jfsIOwait); |
643 | } | 643 | } |
644 | 644 | ||
645 | jfsSyncThread = kernel_thread(jfs_sync, NULL, CLONE_KERNEL); | 645 | jfsSyncThread = kernel_thread(jfs_sync, NULL, CLONE_KERNEL); |
646 | if (jfsSyncThread < 0) { | 646 | if (jfsSyncThread < 0) { |
647 | jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsSyncThread); | 647 | jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsSyncThread); |
648 | goto kill_committask; | 648 | goto kill_committask; |
649 | } | 649 | } |
650 | wait_for_completion(&jfsIOwait); /* Wait until thread starts */ | 650 | wait_for_completion(&jfsIOwait); /* Wait until thread starts */ |
651 | 651 | ||
652 | #ifdef PROC_FS_JFS | 652 | #ifdef PROC_FS_JFS |
653 | jfs_proc_init(); | 653 | jfs_proc_init(); |
654 | #endif | 654 | #endif |
655 | 655 | ||
656 | return register_filesystem(&jfs_fs_type); | 656 | return register_filesystem(&jfs_fs_type); |
657 | 657 | ||
658 | kill_committask: | 658 | kill_committask: |
659 | jfs_stop_threads = 1; | 659 | jfs_stop_threads = 1; |
660 | wake_up_all(&jfs_commit_thread_wait); | 660 | wake_up_all(&jfs_commit_thread_wait); |
661 | for (i = 0; i < commit_threads; i++) | 661 | for (i = 0; i < commit_threads; i++) |
662 | wait_for_completion(&jfsIOwait); | 662 | wait_for_completion(&jfsIOwait); |
663 | 663 | ||
664 | wake_up(&jfs_IO_thread_wait); | 664 | wake_up(&jfs_IO_thread_wait); |
665 | wait_for_completion(&jfsIOwait); /* Wait for thread exit */ | 665 | wait_for_completion(&jfsIOwait); /* Wait for thread exit */ |
666 | end_txmngr: | 666 | end_txmngr: |
667 | txExit(); | 667 | txExit(); |
668 | free_metapage: | 668 | free_metapage: |
669 | metapage_exit(); | 669 | metapage_exit(); |
670 | free_slab: | 670 | free_slab: |
671 | kmem_cache_destroy(jfs_inode_cachep); | 671 | kmem_cache_destroy(jfs_inode_cachep); |
672 | return rc; | 672 | return rc; |
673 | } | 673 | } |
674 | 674 | ||
675 | static void __exit exit_jfs_fs(void) | 675 | static void __exit exit_jfs_fs(void) |
676 | { | 676 | { |
677 | int i; | 677 | int i; |
678 | 678 | ||
679 | jfs_info("exit_jfs_fs called"); | 679 | jfs_info("exit_jfs_fs called"); |
680 | 680 | ||
681 | jfs_stop_threads = 1; | 681 | jfs_stop_threads = 1; |
682 | txExit(); | 682 | txExit(); |
683 | metapage_exit(); | 683 | metapage_exit(); |
684 | wake_up(&jfs_IO_thread_wait); | 684 | wake_up(&jfs_IO_thread_wait); |
685 | wait_for_completion(&jfsIOwait); /* Wait until IO thread exits */ | 685 | wait_for_completion(&jfsIOwait); /* Wait until IO thread exits */ |
686 | wake_up_all(&jfs_commit_thread_wait); | 686 | wake_up_all(&jfs_commit_thread_wait); |
687 | for (i = 0; i < commit_threads; i++) | 687 | for (i = 0; i < commit_threads; i++) |
688 | wait_for_completion(&jfsIOwait); | 688 | wait_for_completion(&jfsIOwait); |
689 | wake_up(&jfs_sync_thread_wait); | 689 | wake_up(&jfs_sync_thread_wait); |
690 | wait_for_completion(&jfsIOwait); /* Wait until Sync thread exits */ | 690 | wait_for_completion(&jfsIOwait); /* Wait until Sync thread exits */ |
691 | #ifdef PROC_FS_JFS | 691 | #ifdef PROC_FS_JFS |
692 | jfs_proc_clean(); | 692 | jfs_proc_clean(); |
693 | #endif | 693 | #endif |
694 | unregister_filesystem(&jfs_fs_type); | 694 | unregister_filesystem(&jfs_fs_type); |
695 | kmem_cache_destroy(jfs_inode_cachep); | 695 | kmem_cache_destroy(jfs_inode_cachep); |
696 | } | 696 | } |
697 | 697 | ||
698 | module_init(init_jfs_fs) | 698 | module_init(init_jfs_fs) |
699 | module_exit(exit_jfs_fs) | 699 | module_exit(exit_jfs_fs) |
700 | 700 |