Blame view

fs/xfs/xfs_log_priv.h 23.5 KB
0b61f8a40   Dave Chinner   xfs: convert to S...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
  /*
7b7187698   Nathan Scott   [XFS] Update lice...
3
4
   * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
   * All Rights Reserved.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
7
8
9
   */
  #ifndef	__XFS_LOG_PRIV_H__
  #define __XFS_LOG_PRIV_H__
  
  struct xfs_buf;
ad223e603   Mark Tinguely   xfs: rename log s...
10
  struct xlog;
a844f4510   Nathan Scott   [XFS] Remove xfs_...
11
  struct xlog_ticket;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
  struct xfs_mount;
  
  /*
fc06c6d06   Dave Chinner   xfs: separate out...
15
   * Flags for log structure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
   */
fc06c6d06   Dave Chinner   xfs: separate out...
17
18
19
20
21
  #define XLOG_ACTIVE_RECOVERY	0x2	/* in the middle of recovery */
  #define	XLOG_RECOVERY_NEEDED	0x4	/* log was recovered */
  #define XLOG_IO_ERROR		0x8	/* log hit an I/O error, and being
  					   shutdown */
  #define XLOG_TAIL_WARN		0x10	/* log tail verify warning issued */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
23
24
25
26
27
28
29
30
31
32
33
34
  
  /*
   * get client id from packed copy.
   *
   * this hack is here because the xlog_pack code copies four bytes
   * of xlog_op_header containing the fields oh_clientid, oh_flags
   * and oh_res2 into the packed copy.
   *
   * later on this four byte chunk is treated as an int and the
   * client id is pulled out.
   *
   * this has endian issues, of course.
   */
b53e675dc   Christoph Hellwig   [XFS] xlog_rec_he...
35
  static inline uint xlog_get_client_id(__be32 i)
03bea6fe6   Christoph Hellwig   [XFS] clean up so...
36
  {
b53e675dc   Christoph Hellwig   [XFS] xlog_rec_he...
37
  	return be32_to_cpu(i) >> 24;
03bea6fe6   Christoph Hellwig   [XFS] clean up so...
38
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
39

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
41
42
  /*
   * In core log state
   */
1858bb0be   Christoph Hellwig   xfs: turn ic_stat...
43
44
45
46
47
  enum xlog_iclog_state {
  	XLOG_STATE_ACTIVE,	/* Current IC log being written to */
  	XLOG_STATE_WANT_SYNC,	/* Want to sync this iclog; no more writes */
  	XLOG_STATE_SYNCING,	/* This IC log is syncing */
  	XLOG_STATE_DONE_SYNC,	/* Done syncing to disk */
1858bb0be   Christoph Hellwig   xfs: turn ic_stat...
48
49
50
51
  	XLOG_STATE_CALLBACK,	/* Callback functions now */
  	XLOG_STATE_DIRTY,	/* Dirty IC log, not ready for ACTIVE status */
  	XLOG_STATE_IOERROR,	/* IO error happened in sync'ing log */
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
53
  
  /*
70e42f2d4   Dave Chinner   xfs: kill XLOG_TI...
54
   * Log ticket flags
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55
   */
70e42f2d4   Dave Chinner   xfs: kill XLOG_TI...
56
  #define XLOG_TIC_PERM_RESERV	0x1	/* permanent reservation */
0b1b213fc   Christoph Hellwig   xfs: event tracin...
57
58
  
  #define XLOG_TIC_FLAGS \
105479419   Dave Chinner   xfs: convert log ...
59
  	{ XLOG_TIC_PERM_RESERV,	"XLOG_TIC_PERM_RESERV" }
0b1b213fc   Christoph Hellwig   xfs: event tracin...
60

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
  /*
   * Below are states for covering allocation transactions.
   * By covering, we mean changing the h_tail_lsn in the last on-disk
   * log write such that no allocation transactions will be re-done during
   * recovery after a system crash. Recovery starts at the last on-disk
   * log write.
   *
   * These states are used to insert dummy log entries to cover
   * space allocation transactions which can undo non-transactional changes
   * after a crash. Writes to a file with space
   * already allocated do not result in any transactions. Allocations
   * might include space beyond the EOF. So if we just push the EOF a
   * little, the last transaction for the file could contain the wrong
   * size. If there is no file system activity, after an allocation
   * transaction, and the system crashes, the allocation transaction
   * will get replayed and the file will be truncated. This could
   * be hours/days/... after the allocation occurred.
   *
   * The fix for this is to do two dummy transactions when the
   * system is idle. We need two dummy transaction because the h_tail_lsn
   * in the log record header needs to point beyond the last possible
   * non-dummy transaction. The first dummy changes the h_tail_lsn to
   * the first transaction before the dummy. The second dummy causes
   * h_tail_lsn to point to the first dummy. Recovery starts at h_tail_lsn.
   *
   * These dummy transactions get committed when everything
   * is idle (after there has been some activity).
   *
   * There are 5 states used to control this.
   *
   *  IDLE -- no logging has been done on the file system or
   *		we are done covering previous transactions.
   *  NEED -- logging has occurred and we need a dummy transaction
   *		when the log becomes idle.
   *  DONE -- we were in the NEED state and have committed a dummy
   *		transaction.
   *  NEED2 -- we detected that a dummy transaction has gone to the
   *		on disk log with no other transactions.
   *  DONE2 -- we committed a dummy transaction when in the NEED2 state.
   *
   * There are two places where we switch states:
   *
   * 1.) In xfs_sync, when we detect an idle log and are in NEED or NEED2.
   *	We commit the dummy transaction and switch to DONE or DONE2,
   *	respectively. In all other states, we don't do anything.
   *
   * 2.) When we finish writing the on-disk log (xlog_state_clean_log).
   *
   *	No matter what state we are in, if this isn't the dummy
   *	transaction going out, the next state is NEED.
   *	So, if we aren't in the DONE or DONE2 states, the next state
   *	is NEED. We can't be finishing a write of the dummy record
   *	unless it was committed and the state switched to DONE or DONE2.
   *
   *	If we are in the DONE state and this was a write of the
   *		dummy transaction, we move to NEED2.
   *
   *	If we are in the DONE2 state and this was a write of the
   *		dummy transaction, we move to IDLE.
   *
   *
   * Writing only one dummy transaction can get appended to
   * one file space allocation. When this happens, the log recovery
   * code replays the space allocation and a file could be truncated.
   * This is why we have the NEED2 and DONE2 states before going idle.
   */
  
  #define XLOG_STATE_COVER_IDLE	0
  #define XLOG_STATE_COVER_NEED	1
  #define XLOG_STATE_COVER_DONE	2
  #define XLOG_STATE_COVER_NEED2	3
  #define XLOG_STATE_COVER_DONE2	4
  
  #define XLOG_COVER_OPS		5
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
135
  /* Ticket reservation region accounting */ 
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
136
  #define XLOG_TIC_LEN_MAX	15
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
137
138
139
140
141
142
143
  
  /*
   * Reservation region
   * As would be stored in xfs_log_iovec but without the i_addr which
   * we don't care about.
   */
  typedef struct xlog_res {
1259845d3   Tim Shimmin   [XFS] remove XFS_...
144
145
  	uint	r_len;	/* region length		:4 */
  	uint	r_type;	/* region's transaction type	:4 */
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
146
  } xlog_res_t;
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
147

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
  typedef struct xlog_ticket {
105479419   Dave Chinner   xfs: convert log ...
149
  	struct list_head   t_queue;	 /* reserve/write queue */
14a7235fb   Christoph Hellwig   xfs: remove log s...
150
  	struct task_struct *t_task;	 /* task that owns this ticket */
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
151
  	xlog_tid_t	   t_tid;	 /* transaction identifier	 : 4  */
cc09c0dc5   Dave Chinner   [XFS] Fix double ...
152
  	atomic_t	   t_ref;	 /* ticket reference count       : 4  */
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
153
154
155
156
157
158
  	int		   t_curr_res;	 /* current reservation in bytes : 4  */
  	int		   t_unit_res;	 /* unit reservation in bytes    : 4  */
  	char		   t_ocnt;	 /* original count		 : 1  */
  	char		   t_cnt;	 /* current count		 : 1  */
  	char		   t_clientid;	 /* who does this belong to;	 : 1  */
  	char		   t_flags;	 /* properties of reservation	 : 1  */
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
159

7e9c63961   Tim Shimmin   [XFS] 929956 add ...
160
161
          /* reservation array fields */
  	uint		   t_res_num;                    /* num in array : 4 */
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
162
163
164
  	uint		   t_res_num_ophdrs;		 /* num op hdrs  : 4 */
  	uint		   t_res_arr_sum;		 /* array sum    : 4 */
  	uint		   t_res_o_flow;		 /* sum overflow : 4 */
1259845d3   Tim Shimmin   [XFS] remove XFS_...
165
  	xlog_res_t	   t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : 8 * 15 */ 
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
  } xlog_ticket_t;
7e9c63961   Tim Shimmin   [XFS] 929956 add ...
167

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
170
171
172
  /*
   * - A log record header is 512 bytes.  There is plenty of room to grow the
   *	xlog_rec_header_t into the reserved space.
   * - ic_data follows, so a write to disk can start at the beginning of
   *	the iclog.
12017faf3   David Chinner   [XFS] clean up st...
173
   * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
   * - ic_next is the pointer to the next iclog in the ring.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
   * - ic_log is a pointer back to the global log structure.
79b54d9bf   Christoph Hellwig   xfs: use bios dir...
176
   * - ic_size is the full size of the log buffer, minus the cycle headers.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
178
179
   * - ic_offset is the current number of bytes written to in this iclog.
   * - ic_refcnt is bumped when someone is writing to the log.
   * - ic_state is the state of the iclog.
114d23aae   David Chinner   [XFS] Per iclog c...
180
181
182
183
184
185
   *
   * Because of cacheline contention on large machines, we need to separate
   * various resources onto different cachelines. To start with, make the
   * structure cacheline aligned. The following fields can be contended on
   * by independent processes:
   *
89ae379d5   Christoph Hellwig   xfs: use a list_h...
186
   *	- ic_callbacks
114d23aae   David Chinner   [XFS] Per iclog c...
187
188
189
190
191
192
   *	- ic_refcnt
   *	- fields protected by the global l_icloglock
   *
   * so we need to ensure that these fields are located in separate cachelines.
   * We'll put all the read-only and l_icloglock fields in the first cacheline,
   * and move everything else out to subsequent cachelines.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
193
   */
b28708d6a   Christoph Hellwig   [XFS] sanitize xl...
194
  typedef struct xlog_in_core {
eb40a8750   Dave Chinner   xfs: use wait que...
195
196
  	wait_queue_head_t	ic_force_wait;
  	wait_queue_head_t	ic_write_wait;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
198
  	struct xlog_in_core	*ic_next;
  	struct xlog_in_core	*ic_prev;
ad223e603   Mark Tinguely   xfs: rename log s...
199
  	struct xlog		*ic_log;
79b54d9bf   Christoph Hellwig   xfs: use bios dir...
200
  	u32			ic_size;
79b54d9bf   Christoph Hellwig   xfs: use bios dir...
201
  	u32			ic_offset;
1858bb0be   Christoph Hellwig   xfs: turn ic_stat...
202
  	enum xlog_iclog_state	ic_state;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
  	char			*ic_datap;	/* pointer to iclog data */
114d23aae   David Chinner   [XFS] Per iclog c...
204
205
206
  
  	/* Callback structures need their own cacheline */
  	spinlock_t		ic_callback_lock ____cacheline_aligned_in_smp;
89ae379d5   Christoph Hellwig   xfs: use a list_h...
207
  	struct list_head	ic_callbacks;
114d23aae   David Chinner   [XFS] Per iclog c...
208
209
210
  
  	/* reference counts need their own cacheline */
  	atomic_t		ic_refcnt ____cacheline_aligned_in_smp;
b28708d6a   Christoph Hellwig   [XFS] sanitize xl...
211
212
  	xlog_in_core_2_t	*ic_data;
  #define ic_header	ic_data->hic_header
366fc4b89   Christoph Hellwig   xfs: remove XLOG_...
213
214
215
  #ifdef DEBUG
  	bool			ic_fail_crc : 1;
  #endif
79b54d9bf   Christoph Hellwig   xfs: use bios dir...
216
217
218
219
  	struct semaphore	ic_sema;
  	struct work_struct	ic_end_io_work;
  	struct bio		ic_bio;
  	struct bio_vec		ic_bvec[];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
221
222
  } xlog_in_core_t;
  
  /*
71e330b59   Dave Chinner   xfs: Introduce de...
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
   * The CIL context is used to aggregate per-transaction details as well be
   * passed to the iclog for checkpoint post-commit processing.  After being
   * passed to the iclog, another context needs to be allocated for tracking the
   * next set of transactions to be aggregated into a checkpoint.
   */
  struct xfs_cil;
  
  struct xfs_cil_ctx {
  	struct xfs_cil		*cil;
  	xfs_lsn_t		sequence;	/* chkpt sequence # */
  	xfs_lsn_t		start_lsn;	/* first LSN of chkpt commit */
  	xfs_lsn_t		commit_lsn;	/* chkpt commit record lsn */
  	struct xlog_ticket	*ticket;	/* chkpt ticket */
  	int			nvecs;		/* number of regions */
  	int			space_used;	/* aggregate size of regions */
  	struct list_head	busy_extents;	/* busy extents in chkpt */
  	struct xfs_log_vec	*lv_chain;	/* logvecs being pushed */
89ae379d5   Christoph Hellwig   xfs: use a list_h...
240
  	struct list_head	iclog_entry;
71e330b59   Dave Chinner   xfs: Introduce de...
241
  	struct list_head	committing;	/* ctx committing list */
4560e78f4   Christoph Hellwig   xfs: don't block ...
242
  	struct work_struct	discard_endio_work;
71e330b59   Dave Chinner   xfs: Introduce de...
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
  };
  
  /*
   * Committed Item List structure
   *
   * This structure is used to track log items that have been committed but not
   * yet written into the log. It is used only when the delayed logging mount
   * option is enabled.
   *
   * This structure tracks the list of committing checkpoint contexts so
   * we can avoid the problem of having to hold out new transactions during a
   * flush until we have a the commit record LSN of the checkpoint. We can
   * traverse the list of committing contexts in xlog_cil_push_lsn() to find a
   * sequence match and extract the commit LSN directly from there. If the
   * checkpoint is still in the process of committing, we can block waiting for
   * the commit LSN to be determined as well. This should make synchronous
   * operations almost as efficient as the old logging methods.
   */
  struct xfs_cil {
ad223e603   Mark Tinguely   xfs: rename log s...
262
  	struct xlog		*xc_log;
71e330b59   Dave Chinner   xfs: Introduce de...
263
264
  	struct list_head	xc_cil;
  	spinlock_t		xc_cil_lock;
4bb928cdb   Dave Chinner   xfs: split the CI...
265
266
  
  	struct rw_semaphore	xc_ctx_lock ____cacheline_aligned_in_smp;
71e330b59   Dave Chinner   xfs: Introduce de...
267
  	struct xfs_cil_ctx	*xc_ctx;
4bb928cdb   Dave Chinner   xfs: split the CI...
268
269
270
  
  	spinlock_t		xc_push_lock ____cacheline_aligned_in_smp;
  	xfs_lsn_t		xc_push_seq;
71e330b59   Dave Chinner   xfs: Introduce de...
271
  	struct list_head	xc_committing;
eb40a8750   Dave Chinner   xfs: use wait que...
272
  	wait_queue_head_t	xc_commit_wait;
a44f13edf   Dave Chinner   xfs: Reduce log f...
273
  	xfs_lsn_t		xc_current_sequence;
4c2d542f2   Dave Chinner   xfs: Do backgroun...
274
  	struct work_struct	xc_push_work;
c7f87f398   Dave Chinner   xfs: fix use-afte...
275
  	wait_queue_head_t	xc_push_wait;	/* background push throttle */
4bb928cdb   Dave Chinner   xfs: split the CI...
276
  } ____cacheline_aligned_in_smp;
71e330b59   Dave Chinner   xfs: Introduce de...
277
278
  
  /*
80168676e   Dave Chinner   xfs: force backgr...
279
280
281
282
283
284
285
   * The amount of log space we allow the CIL to aggregate is difficult to size.
   * Whatever we choose, we have to make sure we can get a reservation for the
   * log space effectively, that it is large enough to capture sufficient
   * relogging to reduce log buffer IO significantly, but it is not too large for
   * the log or induces too much latency when writing out through the iclogs. We
   * track both space consumed and the number of vectors in the checkpoint
   * context, so we need to decide which to use for limiting.
df806158b   Dave Chinner   xfs: enable backg...
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
   *
   * Every log buffer we write out during a push needs a header reserved, which
   * is at least one sector and more for v2 logs. Hence we need a reservation of
   * at least 512 bytes per 32k of log space just for the LR headers. That means
   * 16KB of reservation per megabyte of delayed logging space we will consume,
   * plus various headers.  The number of headers will vary based on the num of
   * io vectors, so limiting on a specific number of vectors is going to result
   * in transactions of varying size. IOWs, it is more consistent to track and
   * limit space consumed in the log rather than by the number of objects being
   * logged in order to prevent checkpoint ticket overruns.
   *
   * Further, use of static reservations through the log grant mechanism is
   * problematic. It introduces a lot of complexity (e.g. reserve grant vs write
   * grant) and a significant deadlock potential because regranting write space
   * can block on log pushes. Hence if we have to regrant log space during a log
   * push, we can deadlock.
   *
   * However, we can avoid this by use of a dynamic "reservation stealing"
   * technique during transaction commit whereby unused reservation space in the
   * transaction ticket is transferred to the CIL ctx commit ticket to cover the
   * space needed by the checkpoint transaction. This means that we never need to
   * specifically reserve space for the CIL checkpoint transaction, nor do we
   * need to regrant space once the checkpoint completes. This also means the
   * checkpoint transaction ticket is specific to the checkpoint context, rather
   * than the CIL itself.
   *
80168676e   Dave Chinner   xfs: force backgr...
312
313
314
315
316
317
318
   * With dynamic reservations, we can effectively make up arbitrary limits for
   * the checkpoint size so long as they don't violate any other size rules.
   * Recovery imposes a rule that no transaction exceed half the log, so we are
   * limited by that.  Furthermore, the log transaction reservation subsystem
   * tries to keep 25% of the log free, so we need to keep below that limit or we
   * risk running out of free log space to start any new transactions.
   *
108a42358   Dave Chinner   xfs: Lower CIL fl...
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
   * In order to keep background CIL push efficient, we only need to ensure the
   * CIL is large enough to maintain sufficient in-memory relogging to avoid
   * repeated physical writes of frequently modified metadata. If we allow the CIL
   * to grow to a substantial fraction of the log, then we may be pinning hundreds
   * of megabytes of metadata in memory until the CIL flushes. This can cause
   * issues when we are running low on memory - pinned memory cannot be reclaimed,
   * and the CIL consumes a lot of memory. Hence we need to set an upper physical
   * size limit for the CIL that limits the maximum amount of memory pinned by the
   * CIL but does not limit performance by reducing relogging efficiency
   * significantly.
   *
   * As such, the CIL push threshold ends up being the smaller of two thresholds:
   * - a threshold large enough that it allows CIL to be pushed and progress to be
   *   made without excessive blocking of incoming transaction commits. This is
   *   defined to be 12.5% of the log space - half the 25% push threshold of the
   *   AIL.
   * - small enough that it doesn't pin excessive amounts of memory but maintains
   *   close to peak relogging efficiency. This is defined to be 16x the iclog
   *   buffer window (32MB) as measurements have shown this to be roughly the
   *   point of diminishing performance increases under highly concurrent
   *   modification workloads.
0e7ab7efe   Dave Chinner   xfs: Throttle com...
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
   *
   * To prevent the CIL from overflowing upper commit size bounds, we introduce a
   * new threshold at which we block committing transactions until the background
   * CIL commit commences and switches to a new context. While this is not a hard
   * limit, it forces the process committing a transaction to the CIL to block and
   * yeild the CPU, giving the CIL push work a chance to be scheduled and start
   * work. This prevents a process running lots of transactions from overfilling
   * the CIL because it is not yielding the CPU. We set the blocking limit at
   * twice the background push space threshold so we keep in line with the AIL
   * push thresholds.
   *
   * Note: this is not a -hard- limit as blocking is applied after the transaction
   * is inserted into the CIL and the push has been triggered. It is largely a
   * throttling mechanism that allows the CIL push to be scheduled and run. A hard
   * limit will be difficult to implement without introducing global serialisation
   * in the CIL commit fast path, and it's not at all clear that we actually need
   * such hard limits given the ~7 years we've run without a hard limit before
   * finding the first situation where a checkpoint size overflow actually
   * occurred. Hence the simple throttle, and an ASSERT check to tell us that
   * we've overrun the max size.
df806158b   Dave Chinner   xfs: enable backg...
360
   */
108a42358   Dave Chinner   xfs: Lower CIL fl...
361
362
  #define XLOG_CIL_SPACE_LIMIT(log)	\
  	min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4)
df806158b   Dave Chinner   xfs: enable backg...
363

0e7ab7efe   Dave Chinner   xfs: Throttle com...
364
365
  #define XLOG_CIL_BLOCKING_SPACE_LIMIT(log)	\
  	(XLOG_CIL_SPACE_LIMIT(log) * 2)
df806158b   Dave Chinner   xfs: enable backg...
366
  /*
28496968a   Christoph Hellwig   xfs: add the xlog...
367
368
369
370
371
372
373
374
375
376
   * ticket grant locks, queues and accounting have their own cachlines
   * as these are quite hot and can be operated on concurrently.
   */
  struct xlog_grant_head {
  	spinlock_t		lock ____cacheline_aligned_in_smp;
  	struct list_head	waiters;
  	atomic64_t		grant;
  };
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
378
379
380
381
   * The reservation head lsn is not made up of a cycle number and block number.
   * Instead, it uses a cycle number and byte number.  Logs don't expect to
   * overflow 31 bits worth of byte offset, so using a byte number will mean
   * that round off problems won't occur when releasing partial reservations.
   */
9a8d2fdbb   Mark Tinguely   xfs: remove xlog_...
382
  struct xlog {
4679b2d36   David Chinner   [XFS] Reorganise ...
383
384
  	/* The following fields don't need locking */
  	struct xfs_mount	*l_mp;	        /* mount point */
a9c21c1b9   David Chinner   [XFS] Given the l...
385
  	struct xfs_ail		*l_ailp;	/* AIL log is working with */
71e330b59   Dave Chinner   xfs: Introduce de...
386
  	struct xfs_cil		*l_cilp;	/* CIL log is working with */
4679b2d36   David Chinner   [XFS] Reorganise ...
387
  	struct xfs_buftarg	*l_targ;        /* buftarg of log */
1058d0f5e   Christoph Hellwig   xfs: move the log...
388
  	struct workqueue_struct	*l_ioend_workqueue; /* for I/O completions */
f661f1e0b   Dave Chinner   xfs: sync work is...
389
  	struct delayed_work	l_work;		/* background flush work */
4679b2d36   David Chinner   [XFS] Reorganise ...
390
391
  	uint			l_flags;
  	uint			l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
d5689eaa0   Christoph Hellwig   xfs: use struct l...
392
  	struct list_head	*l_buf_cancel_table;
4679b2d36   David Chinner   [XFS] Reorganise ...
393
394
  	int			l_iclog_hsize;  /* size of iclog header */
  	int			l_iclog_heads;  /* # of iclog header sectors */
48389ef17   Alex Elder   xfs: kill off l_s...
395
  	uint			l_sectBBsize;   /* sector size in BBs (2^n) */
4679b2d36   David Chinner   [XFS] Reorganise ...
396
  	int			l_iclog_size;	/* size of log in bytes */
4679b2d36   David Chinner   [XFS] Reorganise ...
397
398
399
400
  	int			l_iclog_bufs;	/* number of iclog buffers */
  	xfs_daddr_t		l_logBBstart;   /* start block of log */
  	int			l_logsize;      /* size of log in bytes */
  	int			l_logBBsize;    /* size of log in BB chunks */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
  	/* The following block of fields are changed while holding icloglock */
eb40a8750   Dave Chinner   xfs: use wait que...
402
  	wait_queue_head_t	l_flush_wait ____cacheline_aligned_in_smp;
d748c6236   Matthew Wilcox   [XFS] Convert l_f...
403
  						/* waiting for iclog flush */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
404
405
  	int			l_covered_state;/* state of "covering disk
  						 * log entries" */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
  	xlog_in_core_t		*l_iclog;       /* head log queue	*/
b22cd72c9   Eric Sandeen   [XFS] Unwrap LOG_...
407
  	spinlock_t		l_icloglock;    /* grab to change iclog state */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408
409
410
411
412
  	int			l_curr_cycle;   /* Cycle number of log writes */
  	int			l_prev_cycle;   /* Cycle number before last
  						 * block increment */
  	int			l_curr_block;   /* current logical log block */
  	int			l_prev_block;   /* previous logical log block */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413

84f3c683c   Dave Chinner   xfs: convert l_la...
414
  	/*
1c3cb9ec0   Dave Chinner   xfs: convert l_ta...
415
416
417
418
  	 * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
  	 * read without needing to hold specific locks. To avoid operations
  	 * contending with other hot objects, place each of them on a separate
  	 * cacheline.
84f3c683c   Dave Chinner   xfs: convert l_la...
419
420
421
  	 */
  	/* lsn of last LR on disk */
  	atomic64_t		l_last_sync_lsn ____cacheline_aligned_in_smp;
1c3cb9ec0   Dave Chinner   xfs: convert l_ta...
422
423
  	/* lsn of 1st LR with unflushed * buffers */
  	atomic64_t		l_tail_lsn ____cacheline_aligned_in_smp;
84f3c683c   Dave Chinner   xfs: convert l_la...
424

28496968a   Christoph Hellwig   xfs: add the xlog...
425
426
  	struct xlog_grant_head	l_reserve_head;
  	struct xlog_grant_head	l_write_head;
3f16b9850   Dave Chinner   xfs: introduce ne...
427

baff4e44b   Brian Foster   xfs: add xlog sys...
428
  	struct xfs_kobj		l_kobj;
4679b2d36   David Chinner   [XFS] Reorganise ...
429
430
  	/* The following field are used for debugging; need to hold icloglock */
  #ifdef DEBUG
5809d5e08   Christoph Hellwig   xfs: use void poi...
431
  	void			*l_iclog_bak[XLOG_MAX_ICLOGS];
4679b2d36   David Chinner   [XFS] Reorganise ...
432
  #endif
12818d24d   Brian Foster   xfs: rework log r...
433
434
  	/* log recovery lsn tracking (for buffer submission */
  	xfs_lsn_t		l_recovery_lsn;
9a8d2fdbb   Mark Tinguely   xfs: remove xlog_...
435
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436

d5689eaa0   Christoph Hellwig   xfs: use struct l...
437
  #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
c8ce540db   Darrick J. Wong   xfs: remove doubl...
438
  	((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
d5689eaa0   Christoph Hellwig   xfs: use struct l...
439

b941c7194   Christoph Hellwig   xfs: mark XLOG_FO...
440
441
  #define XLOG_FORCED_SHUTDOWN(log) \
  	(unlikely((log)->l_flags & XLOG_IO_ERROR))
cfcbbbd08   Nathan Scott   [XFS] Remove old,...
442

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443
  /* common routines */
9a8d2fdbb   Mark Tinguely   xfs: remove xlog_...
444
445
446
447
448
449
  extern int
  xlog_recover(
  	struct xlog		*log);
  extern int
  xlog_recover_finish(
  	struct xlog		*log);
a7a9250e1   Hariprasad Kelam   fs: xfs: xfs_log:...
450
  extern void
f0b2efad1   Brian Foster   xfs: don't leave ...
451
  xlog_recover_cancel(struct xlog *);
0e446be44   Christoph Hellwig   xfs: add CRC chec...
452

f9668a09e   Dave Chinner   xfs: fix sparse r...
453
  extern __le32	 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
0e446be44   Christoph Hellwig   xfs: add CRC chec...
454
  			    char *dp, int size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455

71e330b59   Dave Chinner   xfs: Introduce de...
456
  extern kmem_zone_t *xfs_log_ticket_zone;
ad223e603   Mark Tinguely   xfs: rename log s...
457
458
459
460
461
462
  struct xlog_ticket *
  xlog_ticket_alloc(
  	struct xlog	*log,
  	int		unit_bytes,
  	int		count,
  	char		client,
ca4f25899   Carlos Maiolino   xfs: Modify xlog_...
463
  	bool		permanent);
eb01c9cd8   David Chinner   [XFS] Remove the ...
464

e6b1f2737   Christoph Hellwig   xfs: clean up xlo...
465
466
467
468
469
470
471
  static inline void
  xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
  {
  	*ptr += bytes;
  	*len -= bytes;
  	*off += bytes;
  }
71e330b59   Dave Chinner   xfs: Introduce de...
472
  void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
d4ca1d550   Brian Foster   xfs: dump transac...
473
  void	xlog_print_trans(struct xfs_trans *);
7ec949212   Dave Chinner   xfs: don't try to...
474
475
476
477
  int	xlog_write(struct xlog *log, struct xfs_log_vec *log_vector,
  		struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
  		struct xlog_in_core **commit_iclog, uint flags,
  		bool need_start_rec);
f10e925de   Dave Chinner   xfs: merge xlog_c...
478
  int	xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
dd401770b   Dave Chinner   xfs: refactor and...
479
  		struct xlog_in_core **iclog, xfs_lsn_t *lsn);
8b41e3f98   Christoph Hellwig   xfs: split xlog_t...
480
481
  void	xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
  void	xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
71e330b59   Dave Chinner   xfs: Introduce de...
482
483
  
  /*
1c3cb9ec0   Dave Chinner   xfs: convert l_ta...
484
485
486
   * When we crack an atomic LSN, we sample it first so that the value will not
   * change while we are cracking it into the component values. This means we
   * will always get consistent component values to work from. This should always
25985edce   Lucas De Marchi   Fix common misspe...
487
   * be used to sample and crack LSNs that are stored and updated in atomic
1c3cb9ec0   Dave Chinner   xfs: convert l_ta...
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
   * variables.
   */
  static inline void
  xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block)
  {
  	xfs_lsn_t val = atomic64_read(lsn);
  
  	*cycle = CYCLE_LSN(val);
  	*block = BLOCK_LSN(val);
  }
  
  /*
   * Calculate and assign a value to an atomic LSN variable from component pieces.
   */
  static inline void
  xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
  {
  	atomic64_set(lsn, xlog_assign_lsn(cycle, block));
  }
  
  /*
d0eb2f38b   Dave Chinner   xfs: convert gran...
509
   * When we crack the grant head, we sample it first so that the value will not
a69ed03c2   Dave Chinner   xfs: combine gran...
510
511
512
513
   * change while we are cracking it into the component values. This means we
   * will always get consistent component values to work from.
   */
  static inline void
d0eb2f38b   Dave Chinner   xfs: convert gran...
514
  xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
a69ed03c2   Dave Chinner   xfs: combine gran...
515
  {
a69ed03c2   Dave Chinner   xfs: combine gran...
516
517
518
519
520
  	*cycle = val >> 32;
  	*space = val & 0xffffffff;
  }
  
  static inline void
d0eb2f38b   Dave Chinner   xfs: convert gran...
521
522
523
524
525
526
527
528
529
530
531
532
  xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
  {
  	xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
  }
  
  static inline int64_t
  xlog_assign_grant_head_val(int cycle, int space)
  {
  	return ((int64_t)cycle << 32) | space;
  }
  
  static inline void
c8a09ff8c   Dave Chinner   xfs: convert log ...
533
  xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
a69ed03c2   Dave Chinner   xfs: combine gran...
534
  {
d0eb2f38b   Dave Chinner   xfs: convert gran...
535
  	atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
a69ed03c2   Dave Chinner   xfs: combine gran...
536
537
538
  }
  
  /*
71e330b59   Dave Chinner   xfs: Introduce de...
539
540
   * Committed Item List interfaces
   */
2c6e24ce1   Dave Chinner   xfs: prevent dead...
541
542
543
544
  int	xlog_cil_init(struct xlog *log);
  void	xlog_cil_init_post_recovery(struct xlog *log);
  void	xlog_cil_destroy(struct xlog *log);
  bool	xlog_cil_empty(struct xlog *log);
71e330b59   Dave Chinner   xfs: Introduce de...
545

a44f13edf   Dave Chinner   xfs: Reduce log f...
546
547
548
  /*
   * CIL force routines
   */
ad223e603   Mark Tinguely   xfs: rename log s...
549
550
551
552
  xfs_lsn_t
  xlog_cil_force_lsn(
  	struct xlog *log,
  	xfs_lsn_t sequence);
a44f13edf   Dave Chinner   xfs: Reduce log f...
553
554
  
  static inline void
ad223e603   Mark Tinguely   xfs: rename log s...
555
  xlog_cil_force(struct xlog *log)
a44f13edf   Dave Chinner   xfs: Reduce log f...
556
557
558
  {
  	xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
  }
71e330b59   Dave Chinner   xfs: Introduce de...
559

955e47ad2   Tim Shimmin   [XFS] Fixes the l...
560
  /*
eb40a8750   Dave Chinner   xfs: use wait que...
561
562
563
564
   * Wrapper function for waiting on a wait queue serialised against wakeups
   * by a spinlock. This matches the semantics of all the wait queues used in the
   * log code.
   */
f75597935   Darrick J. Wong   xfs: annotate fun...
565
566
567
568
569
  static inline void
  xlog_wait(
  	struct wait_queue_head	*wq,
  	struct spinlock		*lock)
  		__releases(lock)
eb40a8750   Dave Chinner   xfs: use wait que...
570
571
572
573
574
575
576
577
578
  {
  	DECLARE_WAITQUEUE(wait, current);
  
  	add_wait_queue_exclusive(wq, &wait);
  	__set_current_state(TASK_UNINTERRUPTIBLE);
  	spin_unlock(lock);
  	schedule();
  	remove_wait_queue(wq, &wait);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
579

a45086e27   Brian Foster   xfs: validate met...
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
  /*
   * The LSN is valid so long as it is behind the current LSN. If it isn't, this
   * means that the next log record that includes this metadata could have a
   * smaller LSN. In turn, this means that the modification in the log would not
   * replay.
   */
  static inline bool
  xlog_valid_lsn(
  	struct xlog	*log,
  	xfs_lsn_t	lsn)
  {
  	int		cur_cycle;
  	int		cur_block;
  	bool		valid = true;
  
  	/*
  	 * First, sample the current lsn without locking to avoid added
  	 * contention from metadata I/O. The current cycle and block are updated
  	 * (in xlog_state_switch_iclogs()) and read here in a particular order
  	 * to avoid false negatives (e.g., thinking the metadata LSN is valid
  	 * when it is not).
  	 *
  	 * The current block is always rewound before the cycle is bumped in
  	 * xlog_state_switch_iclogs() to ensure the current LSN is never seen in
  	 * a transiently forward state. Instead, we can see the LSN in a
  	 * transiently behind state if we happen to race with a cycle wrap.
  	 */
6aa7de059   Mark Rutland   locking/atomics: ...
607
  	cur_cycle = READ_ONCE(log->l_curr_cycle);
a45086e27   Brian Foster   xfs: validate met...
608
  	smp_rmb();
6aa7de059   Mark Rutland   locking/atomics: ...
609
  	cur_block = READ_ONCE(log->l_curr_block);
a45086e27   Brian Foster   xfs: validate met...
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
  
  	if ((CYCLE_LSN(lsn) > cur_cycle) ||
  	    (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) {
  		/*
  		 * If the metadata LSN appears invalid, it's possible the check
  		 * above raced with a wrap to the next log cycle. Grab the lock
  		 * to check for sure.
  		 */
  		spin_lock(&log->l_icloglock);
  		cur_cycle = log->l_curr_cycle;
  		cur_block = log->l_curr_block;
  		spin_unlock(&log->l_icloglock);
  
  		if ((CYCLE_LSN(lsn) > cur_cycle) ||
  		    (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block))
  			valid = false;
  	}
  
  	return valid;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
630
  #endif	/* __XFS_LOG_PRIV_H__ */