Commit 891456227881da9c565c455010380a40d385a478

Authored by Marcin Slusarz
Committed by Dave Kleikamp
1 parent 96b5a46e2a

jfs: le*_add_cpu conversion

replace all:
little_endian_variable = cpu_to_leX(leX_to_cpu(little_endian_variable) +
                                        expression_in_cpu_byteorder);
with:
        leX_add_cpu(&little_endian_variable, expression_in_cpu_byteorder);
generated with semantic patch

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: jfs-discussion@lists.sourceforge.net

Showing 3 changed files with 19 additions and 33 deletions Inline Diff

1 /* 1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2004 2 * Copyright (C) International Business Machines Corp., 2000-2004
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19 #include <linux/fs.h> 19 #include <linux/fs.h>
20 #include "jfs_incore.h" 20 #include "jfs_incore.h"
21 #include "jfs_superblock.h" 21 #include "jfs_superblock.h"
22 #include "jfs_dmap.h" 22 #include "jfs_dmap.h"
23 #include "jfs_imap.h" 23 #include "jfs_imap.h"
24 #include "jfs_lock.h" 24 #include "jfs_lock.h"
25 #include "jfs_metapage.h" 25 #include "jfs_metapage.h"
26 #include "jfs_debug.h" 26 #include "jfs_debug.h"
27 27
28 /* 28 /*
29 * SERIALIZATION of the Block Allocation Map. 29 * SERIALIZATION of the Block Allocation Map.
30 * 30 *
31 * the working state of the block allocation map is accessed in 31 * the working state of the block allocation map is accessed in
32 * two directions: 32 * two directions:
33 * 33 *
34 * 1) allocation and free requests that start at the dmap 34 * 1) allocation and free requests that start at the dmap
35 * level and move up through the dmap control pages (i.e. 35 * level and move up through the dmap control pages (i.e.
36 * the vast majority of requests). 36 * the vast majority of requests).
37 * 37 *
38 * 2) allocation requests that start at dmap control page 38 * 2) allocation requests that start at dmap control page
39 * level and work down towards the dmaps. 39 * level and work down towards the dmaps.
40 * 40 *
41 * the serialization scheme used here is as follows. 41 * the serialization scheme used here is as follows.
42 * 42 *
43 * requests which start at the bottom are serialized against each 43 * requests which start at the bottom are serialized against each
44 * other through buffers and each requests holds onto its buffers 44 * other through buffers and each requests holds onto its buffers
45 * as it works it way up from a single dmap to the required level 45 * as it works it way up from a single dmap to the required level
46 * of dmap control page. 46 * of dmap control page.
47 * requests that start at the top are serialized against each other 47 * requests that start at the top are serialized against each other
48 * and request that start from the bottom by the multiple read/single 48 * and request that start from the bottom by the multiple read/single
49 * write inode lock of the bmap inode. requests starting at the top 49 * write inode lock of the bmap inode. requests starting at the top
50 * take this lock in write mode while request starting at the bottom 50 * take this lock in write mode while request starting at the bottom
51 * take the lock in read mode. a single top-down request may proceed 51 * take the lock in read mode. a single top-down request may proceed
52 * exclusively while multiple bottoms-up requests may proceed 52 * exclusively while multiple bottoms-up requests may proceed
53 * simultaneously (under the protection of busy buffers). 53 * simultaneously (under the protection of busy buffers).
54 * 54 *
55 * in addition to information found in dmaps and dmap control pages, 55 * in addition to information found in dmaps and dmap control pages,
56 * the working state of the block allocation map also includes read/ 56 * the working state of the block allocation map also includes read/
57 * write information maintained in the bmap descriptor (i.e. total 57 * write information maintained in the bmap descriptor (i.e. total
58 * free block count, allocation group level free block counts). 58 * free block count, allocation group level free block counts).
59 * a single exclusive lock (BMAP_LOCK) is used to guard this information 59 * a single exclusive lock (BMAP_LOCK) is used to guard this information
60 * in the face of multiple-bottoms up requests. 60 * in the face of multiple-bottoms up requests.
61 * (lock ordering: IREAD_LOCK, BMAP_LOCK); 61 * (lock ordering: IREAD_LOCK, BMAP_LOCK);
62 * 62 *
63 * accesses to the persistent state of the block allocation map (limited 63 * accesses to the persistent state of the block allocation map (limited
64 * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. 64 * to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
65 */ 65 */
66 66
67 #define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock) 67 #define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock)
68 #define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock) 68 #define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock)
69 #define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock) 69 #define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock)
70 70
71 /* 71 /*
72 * forward references 72 * forward references
73 */ 73 */
74 static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 74 static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
75 int nblocks); 75 int nblocks);
76 static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); 76 static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
77 static int dbBackSplit(dmtree_t * tp, int leafno); 77 static int dbBackSplit(dmtree_t * tp, int leafno);
78 static int dbJoin(dmtree_t * tp, int leafno, int newval); 78 static int dbJoin(dmtree_t * tp, int leafno, int newval);
79 static void dbAdjTree(dmtree_t * tp, int leafno, int newval); 79 static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
80 static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, 80 static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
81 int level); 81 int level);
82 static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); 82 static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results);
83 static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, 83 static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
84 int nblocks); 84 int nblocks);
85 static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno, 85 static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno,
86 int nblocks, 86 int nblocks,
87 int l2nb, s64 * results); 87 int l2nb, s64 * results);
88 static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 88 static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
89 int nblocks); 89 int nblocks);
90 static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks, 90 static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks,
91 int l2nb, 91 int l2nb,
92 s64 * results); 92 s64 * results);
93 static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, 93 static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb,
94 s64 * results); 94 s64 * results);
95 static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, 95 static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno,
96 s64 * results); 96 s64 * results);
97 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); 97 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
98 static int dbFindBits(u32 word, int l2nb); 98 static int dbFindBits(u32 word, int l2nb);
99 static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); 99 static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
100 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); 100 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
101 static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 101 static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
102 int nblocks); 102 int nblocks);
103 static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 103 static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
104 int nblocks); 104 int nblocks);
105 static int dbMaxBud(u8 * cp); 105 static int dbMaxBud(u8 * cp);
106 s64 dbMapFileSizeToMapSize(struct inode *ipbmap); 106 s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
107 static int blkstol2(s64 nb); 107 static int blkstol2(s64 nb);
108 108
109 static int cntlz(u32 value); 109 static int cntlz(u32 value);
110 static int cnttz(u32 word); 110 static int cnttz(u32 word);
111 111
112 static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, 112 static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
113 int nblocks); 113 int nblocks);
114 static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks); 114 static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks);
115 static int dbInitDmapTree(struct dmap * dp); 115 static int dbInitDmapTree(struct dmap * dp);
116 static int dbInitTree(struct dmaptree * dtp); 116 static int dbInitTree(struct dmaptree * dtp);
117 static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i); 117 static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i);
118 static int dbGetL2AGSize(s64 nblocks); 118 static int dbGetL2AGSize(s64 nblocks);
119 119
120 /* 120 /*
121 * buddy table 121 * buddy table
122 * 122 *
123 * table used for determining buddy sizes within characters of 123 * table used for determining buddy sizes within characters of
124 * dmap bitmap words. the characters themselves serve as indexes 124 * dmap bitmap words. the characters themselves serve as indexes
125 * into the table, with the table elements yielding the maximum 125 * into the table, with the table elements yielding the maximum
126 * binary buddy of free bits within the character. 126 * binary buddy of free bits within the character.
127 */ 127 */
128 static const s8 budtab[256] = { 128 static const s8 budtab[256] = {
129 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 129 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 130 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 131 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 132 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 133 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 134 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
135 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 135 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
136 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 136 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
137 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 137 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 138 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
139 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 139 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
140 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 140 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
141 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 141 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 142 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
143 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 143 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
144 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 144 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
145 }; 145 };
146 146
147 147
148 /* 148 /*
149 * NAME: dbMount() 149 * NAME: dbMount()
150 * 150 *
151 * FUNCTION: initializate the block allocation map. 151 * FUNCTION: initializate the block allocation map.
152 * 152 *
153 * memory is allocated for the in-core bmap descriptor and 153 * memory is allocated for the in-core bmap descriptor and
154 * the in-core descriptor is initialized from disk. 154 * the in-core descriptor is initialized from disk.
155 * 155 *
156 * PARAMETERS: 156 * PARAMETERS:
157 * ipbmap - pointer to in-core inode for the block map. 157 * ipbmap - pointer to in-core inode for the block map.
158 * 158 *
159 * RETURN VALUES: 159 * RETURN VALUES:
160 * 0 - success 160 * 0 - success
161 * -ENOMEM - insufficient memory 161 * -ENOMEM - insufficient memory
162 * -EIO - i/o error 162 * -EIO - i/o error
163 */ 163 */
164 int dbMount(struct inode *ipbmap) 164 int dbMount(struct inode *ipbmap)
165 { 165 {
166 struct bmap *bmp; 166 struct bmap *bmp;
167 struct dbmap_disk *dbmp_le; 167 struct dbmap_disk *dbmp_le;
168 struct metapage *mp; 168 struct metapage *mp;
169 int i; 169 int i;
170 170
171 /* 171 /*
172 * allocate/initialize the in-memory bmap descriptor 172 * allocate/initialize the in-memory bmap descriptor
173 */ 173 */
174 /* allocate memory for the in-memory bmap descriptor */ 174 /* allocate memory for the in-memory bmap descriptor */
175 bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL); 175 bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL);
176 if (bmp == NULL) 176 if (bmp == NULL)
177 return -ENOMEM; 177 return -ENOMEM;
178 178
179 /* read the on-disk bmap descriptor. */ 179 /* read the on-disk bmap descriptor. */
180 mp = read_metapage(ipbmap, 180 mp = read_metapage(ipbmap,
181 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, 181 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
182 PSIZE, 0); 182 PSIZE, 0);
183 if (mp == NULL) { 183 if (mp == NULL) {
184 kfree(bmp); 184 kfree(bmp);
185 return -EIO; 185 return -EIO;
186 } 186 }
187 187
188 /* copy the on-disk bmap descriptor to its in-memory version. */ 188 /* copy the on-disk bmap descriptor to its in-memory version. */
189 dbmp_le = (struct dbmap_disk *) mp->data; 189 dbmp_le = (struct dbmap_disk *) mp->data;
190 bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); 190 bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
191 bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); 191 bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
192 bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); 192 bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
193 bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); 193 bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
194 bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); 194 bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
195 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); 195 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
196 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); 196 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
197 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); 197 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
198 bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); 198 bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth);
199 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); 199 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
200 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); 200 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
201 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); 201 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
202 for (i = 0; i < MAXAG; i++) 202 for (i = 0; i < MAXAG; i++)
203 bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); 203 bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
204 bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); 204 bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
205 bmp->db_maxfreebud = dbmp_le->dn_maxfreebud; 205 bmp->db_maxfreebud = dbmp_le->dn_maxfreebud;
206 206
207 /* release the buffer. */ 207 /* release the buffer. */
208 release_metapage(mp); 208 release_metapage(mp);
209 209
210 /* bind the bmap inode and the bmap descriptor to each other. */ 210 /* bind the bmap inode and the bmap descriptor to each other. */
211 bmp->db_ipbmap = ipbmap; 211 bmp->db_ipbmap = ipbmap;
212 JFS_SBI(ipbmap->i_sb)->bmap = bmp; 212 JFS_SBI(ipbmap->i_sb)->bmap = bmp;
213 213
214 memset(bmp->db_active, 0, sizeof(bmp->db_active)); 214 memset(bmp->db_active, 0, sizeof(bmp->db_active));
215 215
216 /* 216 /*
217 * allocate/initialize the bmap lock 217 * allocate/initialize the bmap lock
218 */ 218 */
219 BMAP_LOCK_INIT(bmp); 219 BMAP_LOCK_INIT(bmp);
220 220
221 return (0); 221 return (0);
222 } 222 }
223 223
224 224
225 /* 225 /*
226 * NAME: dbUnmount() 226 * NAME: dbUnmount()
227 * 227 *
228 * FUNCTION: terminate the block allocation map in preparation for 228 * FUNCTION: terminate the block allocation map in preparation for
229 * file system unmount. 229 * file system unmount.
230 * 230 *
231 * the in-core bmap descriptor is written to disk and 231 * the in-core bmap descriptor is written to disk and
232 * the memory for this descriptor is freed. 232 * the memory for this descriptor is freed.
233 * 233 *
234 * PARAMETERS: 234 * PARAMETERS:
235 * ipbmap - pointer to in-core inode for the block map. 235 * ipbmap - pointer to in-core inode for the block map.
236 * 236 *
237 * RETURN VALUES: 237 * RETURN VALUES:
238 * 0 - success 238 * 0 - success
239 * -EIO - i/o error 239 * -EIO - i/o error
240 */ 240 */
241 int dbUnmount(struct inode *ipbmap, int mounterror) 241 int dbUnmount(struct inode *ipbmap, int mounterror)
242 { 242 {
243 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 243 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
244 244
245 if (!(mounterror || isReadOnly(ipbmap))) 245 if (!(mounterror || isReadOnly(ipbmap)))
246 dbSync(ipbmap); 246 dbSync(ipbmap);
247 247
248 /* 248 /*
249 * Invalidate the page cache buffers 249 * Invalidate the page cache buffers
250 */ 250 */
251 truncate_inode_pages(ipbmap->i_mapping, 0); 251 truncate_inode_pages(ipbmap->i_mapping, 0);
252 252
253 /* free the memory for the in-memory bmap. */ 253 /* free the memory for the in-memory bmap. */
254 kfree(bmp); 254 kfree(bmp);
255 255
256 return (0); 256 return (0);
257 } 257 }
258 258
259 /* 259 /*
260 * dbSync() 260 * dbSync()
261 */ 261 */
262 int dbSync(struct inode *ipbmap) 262 int dbSync(struct inode *ipbmap)
263 { 263 {
264 struct dbmap_disk *dbmp_le; 264 struct dbmap_disk *dbmp_le;
265 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 265 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
266 struct metapage *mp; 266 struct metapage *mp;
267 int i; 267 int i;
268 268
269 /* 269 /*
270 * write bmap global control page 270 * write bmap global control page
271 */ 271 */
272 /* get the buffer for the on-disk bmap descriptor. */ 272 /* get the buffer for the on-disk bmap descriptor. */
273 mp = read_metapage(ipbmap, 273 mp = read_metapage(ipbmap,
274 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, 274 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
275 PSIZE, 0); 275 PSIZE, 0);
276 if (mp == NULL) { 276 if (mp == NULL) {
277 jfs_err("dbSync: read_metapage failed!"); 277 jfs_err("dbSync: read_metapage failed!");
278 return -EIO; 278 return -EIO;
279 } 279 }
280 /* copy the in-memory version of the bmap to the on-disk version */ 280 /* copy the in-memory version of the bmap to the on-disk version */
281 dbmp_le = (struct dbmap_disk *) mp->data; 281 dbmp_le = (struct dbmap_disk *) mp->data;
282 dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); 282 dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize);
283 dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); 283 dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree);
284 dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); 284 dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage);
285 dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); 285 dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag);
286 dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); 286 dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel);
287 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); 287 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
288 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); 288 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
289 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); 289 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
290 dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); 290 dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth);
291 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); 291 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
292 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); 292 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
293 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); 293 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
294 for (i = 0; i < MAXAG; i++) 294 for (i = 0; i < MAXAG; i++)
295 dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); 295 dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]);
296 dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); 296 dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize);
297 dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; 297 dbmp_le->dn_maxfreebud = bmp->db_maxfreebud;
298 298
299 /* write the buffer */ 299 /* write the buffer */
300 write_metapage(mp); 300 write_metapage(mp);
301 301
302 /* 302 /*
303 * write out dirty pages of bmap 303 * write out dirty pages of bmap
304 */ 304 */
305 filemap_write_and_wait(ipbmap->i_mapping); 305 filemap_write_and_wait(ipbmap->i_mapping);
306 306
307 diWriteSpecial(ipbmap, 0); 307 diWriteSpecial(ipbmap, 0);
308 308
309 return (0); 309 return (0);
310 } 310 }
311 311
312 312
313 /* 313 /*
314 * NAME: dbFree() 314 * NAME: dbFree()
315 * 315 *
316 * FUNCTION: free the specified block range from the working block 316 * FUNCTION: free the specified block range from the working block
317 * allocation map. 317 * allocation map.
318 * 318 *
319 * the blocks will be free from the working map one dmap 319 * the blocks will be free from the working map one dmap
320 * at a time. 320 * at a time.
321 * 321 *
322 * PARAMETERS: 322 * PARAMETERS:
323 * ip - pointer to in-core inode; 323 * ip - pointer to in-core inode;
324 * blkno - starting block number to be freed. 324 * blkno - starting block number to be freed.
325 * nblocks - number of blocks to be freed. 325 * nblocks - number of blocks to be freed.
326 * 326 *
327 * RETURN VALUES: 327 * RETURN VALUES:
328 * 0 - success 328 * 0 - success
329 * -EIO - i/o error 329 * -EIO - i/o error
330 */ 330 */
331 int dbFree(struct inode *ip, s64 blkno, s64 nblocks) 331 int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
332 { 332 {
333 struct metapage *mp; 333 struct metapage *mp;
334 struct dmap *dp; 334 struct dmap *dp;
335 int nb, rc; 335 int nb, rc;
336 s64 lblkno, rem; 336 s64 lblkno, rem;
337 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 337 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
338 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 338 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
339 339
340 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 340 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
341 341
342 /* block to be freed better be within the mapsize. */ 342 /* block to be freed better be within the mapsize. */
343 if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { 343 if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) {
344 IREAD_UNLOCK(ipbmap); 344 IREAD_UNLOCK(ipbmap);
345 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", 345 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
346 (unsigned long long) blkno, 346 (unsigned long long) blkno,
347 (unsigned long long) nblocks); 347 (unsigned long long) nblocks);
348 jfs_error(ip->i_sb, 348 jfs_error(ip->i_sb,
349 "dbFree: block to be freed is outside the map"); 349 "dbFree: block to be freed is outside the map");
350 return -EIO; 350 return -EIO;
351 } 351 }
352 352
353 /* 353 /*
354 * free the blocks a dmap at a time. 354 * free the blocks a dmap at a time.
355 */ 355 */
356 mp = NULL; 356 mp = NULL;
357 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { 357 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
358 /* release previous dmap if any */ 358 /* release previous dmap if any */
359 if (mp) { 359 if (mp) {
360 write_metapage(mp); 360 write_metapage(mp);
361 } 361 }
362 362
363 /* get the buffer for the current dmap. */ 363 /* get the buffer for the current dmap. */
364 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 364 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
365 mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 365 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
366 if (mp == NULL) { 366 if (mp == NULL) {
367 IREAD_UNLOCK(ipbmap); 367 IREAD_UNLOCK(ipbmap);
368 return -EIO; 368 return -EIO;
369 } 369 }
370 dp = (struct dmap *) mp->data; 370 dp = (struct dmap *) mp->data;
371 371
372 /* determine the number of blocks to be freed from 372 /* determine the number of blocks to be freed from
373 * this dmap. 373 * this dmap.
374 */ 374 */
375 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); 375 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
376 376
377 /* free the blocks. */ 377 /* free the blocks. */
378 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { 378 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
379 jfs_error(ip->i_sb, "dbFree: error in block map\n"); 379 jfs_error(ip->i_sb, "dbFree: error in block map\n");
380 release_metapage(mp); 380 release_metapage(mp);
381 IREAD_UNLOCK(ipbmap); 381 IREAD_UNLOCK(ipbmap);
382 return (rc); 382 return (rc);
383 } 383 }
384 } 384 }
385 385
386 /* write the last buffer. */ 386 /* write the last buffer. */
387 write_metapage(mp); 387 write_metapage(mp);
388 388
389 IREAD_UNLOCK(ipbmap); 389 IREAD_UNLOCK(ipbmap);
390 390
391 return (0); 391 return (0);
392 } 392 }
393 393
394 394
395 /* 395 /*
396 * NAME: dbUpdatePMap() 396 * NAME: dbUpdatePMap()
397 * 397 *
398 * FUNCTION: update the allocation state (free or allocate) of the 398 * FUNCTION: update the allocation state (free or allocate) of the
399 * specified block range in the persistent block allocation map. 399 * specified block range in the persistent block allocation map.
400 * 400 *
401 * the blocks will be updated in the persistent map one 401 * the blocks will be updated in the persistent map one
402 * dmap at a time. 402 * dmap at a time.
403 * 403 *
404 * PARAMETERS: 404 * PARAMETERS:
405 * ipbmap - pointer to in-core inode for the block map. 405 * ipbmap - pointer to in-core inode for the block map.
406 * free - 'true' if block range is to be freed from the persistent 406 * free - 'true' if block range is to be freed from the persistent
407 * map; 'false' if it is to be allocated. 407 * map; 'false' if it is to be allocated.
408 * blkno - starting block number of the range. 408 * blkno - starting block number of the range.
409 * nblocks - number of contiguous blocks in the range. 409 * nblocks - number of contiguous blocks in the range.
410 * tblk - transaction block; 410 * tblk - transaction block;
411 * 411 *
412 * RETURN VALUES: 412 * RETURN VALUES:
413 * 0 - success 413 * 0 - success
414 * -EIO - i/o error 414 * -EIO - i/o error
415 */ 415 */
416 int 416 int
417 dbUpdatePMap(struct inode *ipbmap, 417 dbUpdatePMap(struct inode *ipbmap,
418 int free, s64 blkno, s64 nblocks, struct tblock * tblk) 418 int free, s64 blkno, s64 nblocks, struct tblock * tblk)
419 { 419 {
420 int nblks, dbitno, wbitno, rbits; 420 int nblks, dbitno, wbitno, rbits;
421 int word, nbits, nwords; 421 int word, nbits, nwords;
422 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 422 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
423 s64 lblkno, rem, lastlblkno; 423 s64 lblkno, rem, lastlblkno;
424 u32 mask; 424 u32 mask;
425 struct dmap *dp; 425 struct dmap *dp;
426 struct metapage *mp; 426 struct metapage *mp;
427 struct jfs_log *log; 427 struct jfs_log *log;
428 int lsn, difft, diffp; 428 int lsn, difft, diffp;
429 unsigned long flags; 429 unsigned long flags;
430 430
431 /* the blocks better be within the mapsize. */ 431 /* the blocks better be within the mapsize. */
432 if (blkno + nblocks > bmp->db_mapsize) { 432 if (blkno + nblocks > bmp->db_mapsize) {
433 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", 433 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
434 (unsigned long long) blkno, 434 (unsigned long long) blkno,
435 (unsigned long long) nblocks); 435 (unsigned long long) nblocks);
436 jfs_error(ipbmap->i_sb, 436 jfs_error(ipbmap->i_sb,
437 "dbUpdatePMap: blocks are outside the map"); 437 "dbUpdatePMap: blocks are outside the map");
438 return -EIO; 438 return -EIO;
439 } 439 }
440 440
441 /* compute delta of transaction lsn from log syncpt */ 441 /* compute delta of transaction lsn from log syncpt */
442 lsn = tblk->lsn; 442 lsn = tblk->lsn;
443 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; 443 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
444 logdiff(difft, lsn, log); 444 logdiff(difft, lsn, log);
445 445
446 /* 446 /*
447 * update the block state a dmap at a time. 447 * update the block state a dmap at a time.
448 */ 448 */
449 mp = NULL; 449 mp = NULL;
450 lastlblkno = 0; 450 lastlblkno = 0;
451 for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) { 451 for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) {
452 /* get the buffer for the current dmap. */ 452 /* get the buffer for the current dmap. */
453 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 453 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
454 if (lblkno != lastlblkno) { 454 if (lblkno != lastlblkno) {
455 if (mp) { 455 if (mp) {
456 write_metapage(mp); 456 write_metapage(mp);
457 } 457 }
458 458
459 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 459 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE,
460 0); 460 0);
461 if (mp == NULL) 461 if (mp == NULL)
462 return -EIO; 462 return -EIO;
463 metapage_wait_for_io(mp); 463 metapage_wait_for_io(mp);
464 } 464 }
465 dp = (struct dmap *) mp->data; 465 dp = (struct dmap *) mp->data;
466 466
467 /* determine the bit number and word within the dmap of 467 /* determine the bit number and word within the dmap of
468 * the starting block. also determine how many blocks 468 * the starting block. also determine how many blocks
469 * are to be updated within this dmap. 469 * are to be updated within this dmap.
470 */ 470 */
471 dbitno = blkno & (BPERDMAP - 1); 471 dbitno = blkno & (BPERDMAP - 1);
472 word = dbitno >> L2DBWORD; 472 word = dbitno >> L2DBWORD;
473 nblks = min(rem, (s64)BPERDMAP - dbitno); 473 nblks = min(rem, (s64)BPERDMAP - dbitno);
474 474
475 /* update the bits of the dmap words. the first and last 475 /* update the bits of the dmap words. the first and last
476 * words may only have a subset of their bits updated. if 476 * words may only have a subset of their bits updated. if
477 * this is the case, we'll work against that word (i.e. 477 * this is the case, we'll work against that word (i.e.
478 * partial first and/or last) only in a single pass. a 478 * partial first and/or last) only in a single pass. a
479 * single pass will also be used to update all words that 479 * single pass will also be used to update all words that
480 * are to have all their bits updated. 480 * are to have all their bits updated.
481 */ 481 */
482 for (rbits = nblks; rbits > 0; 482 for (rbits = nblks; rbits > 0;
483 rbits -= nbits, dbitno += nbits) { 483 rbits -= nbits, dbitno += nbits) {
484 /* determine the bit number within the word and 484 /* determine the bit number within the word and
485 * the number of bits within the word. 485 * the number of bits within the word.
486 */ 486 */
487 wbitno = dbitno & (DBWORD - 1); 487 wbitno = dbitno & (DBWORD - 1);
488 nbits = min(rbits, DBWORD - wbitno); 488 nbits = min(rbits, DBWORD - wbitno);
489 489
490 /* check if only part of the word is to be updated. */ 490 /* check if only part of the word is to be updated. */
491 if (nbits < DBWORD) { 491 if (nbits < DBWORD) {
492 /* update (free or allocate) the bits 492 /* update (free or allocate) the bits
493 * in this word. 493 * in this word.
494 */ 494 */
495 mask = 495 mask =
496 (ONES << (DBWORD - nbits) >> wbitno); 496 (ONES << (DBWORD - nbits) >> wbitno);
497 if (free) 497 if (free)
498 dp->pmap[word] &= 498 dp->pmap[word] &=
499 cpu_to_le32(~mask); 499 cpu_to_le32(~mask);
500 else 500 else
501 dp->pmap[word] |= 501 dp->pmap[word] |=
502 cpu_to_le32(mask); 502 cpu_to_le32(mask);
503 503
504 word += 1; 504 word += 1;
505 } else { 505 } else {
506 /* one or more words are to have all 506 /* one or more words are to have all
507 * their bits updated. determine how 507 * their bits updated. determine how
508 * many words and how many bits. 508 * many words and how many bits.
509 */ 509 */
510 nwords = rbits >> L2DBWORD; 510 nwords = rbits >> L2DBWORD;
511 nbits = nwords << L2DBWORD; 511 nbits = nwords << L2DBWORD;
512 512
513 /* update (free or allocate) the bits 513 /* update (free or allocate) the bits
514 * in these words. 514 * in these words.
515 */ 515 */
516 if (free) 516 if (free)
517 memset(&dp->pmap[word], 0, 517 memset(&dp->pmap[word], 0,
518 nwords * 4); 518 nwords * 4);
519 else 519 else
520 memset(&dp->pmap[word], (int) ONES, 520 memset(&dp->pmap[word], (int) ONES,
521 nwords * 4); 521 nwords * 4);
522 522
523 word += nwords; 523 word += nwords;
524 } 524 }
525 } 525 }
526 526
527 /* 527 /*
528 * update dmap lsn 528 * update dmap lsn
529 */ 529 */
530 if (lblkno == lastlblkno) 530 if (lblkno == lastlblkno)
531 continue; 531 continue;
532 532
533 lastlblkno = lblkno; 533 lastlblkno = lblkno;
534 534
535 LOGSYNC_LOCK(log, flags); 535 LOGSYNC_LOCK(log, flags);
536 if (mp->lsn != 0) { 536 if (mp->lsn != 0) {
537 /* inherit older/smaller lsn */ 537 /* inherit older/smaller lsn */
538 logdiff(diffp, mp->lsn, log); 538 logdiff(diffp, mp->lsn, log);
539 if (difft < diffp) { 539 if (difft < diffp) {
540 mp->lsn = lsn; 540 mp->lsn = lsn;
541 541
542 /* move bp after tblock in logsync list */ 542 /* move bp after tblock in logsync list */
543 list_move(&mp->synclist, &tblk->synclist); 543 list_move(&mp->synclist, &tblk->synclist);
544 } 544 }
545 545
546 /* inherit younger/larger clsn */ 546 /* inherit younger/larger clsn */
547 logdiff(difft, tblk->clsn, log); 547 logdiff(difft, tblk->clsn, log);
548 logdiff(diffp, mp->clsn, log); 548 logdiff(diffp, mp->clsn, log);
549 if (difft > diffp) 549 if (difft > diffp)
550 mp->clsn = tblk->clsn; 550 mp->clsn = tblk->clsn;
551 } else { 551 } else {
552 mp->log = log; 552 mp->log = log;
553 mp->lsn = lsn; 553 mp->lsn = lsn;
554 554
555 /* insert bp after tblock in logsync list */ 555 /* insert bp after tblock in logsync list */
556 log->count++; 556 log->count++;
557 list_add(&mp->synclist, &tblk->synclist); 557 list_add(&mp->synclist, &tblk->synclist);
558 558
559 mp->clsn = tblk->clsn; 559 mp->clsn = tblk->clsn;
560 } 560 }
561 LOGSYNC_UNLOCK(log, flags); 561 LOGSYNC_UNLOCK(log, flags);
562 } 562 }
563 563
564 /* write the last buffer. */ 564 /* write the last buffer. */
565 if (mp) { 565 if (mp) {
566 write_metapage(mp); 566 write_metapage(mp);
567 } 567 }
568 568
569 return (0); 569 return (0);
570 } 570 }
571 571
572 572
573 /* 573 /*
574 * NAME: dbNextAG() 574 * NAME: dbNextAG()
575 * 575 *
576 * FUNCTION: find the preferred allocation group for new allocations. 576 * FUNCTION: find the preferred allocation group for new allocations.
577 * 577 *
578 * Within the allocation groups, we maintain a preferred 578 * Within the allocation groups, we maintain a preferred
579 * allocation group which consists of a group with at least 579 * allocation group which consists of a group with at least
580 * average free space. It is the preferred group that we target 580 * average free space. It is the preferred group that we target
581 * new inode allocation towards. The tie-in between inode 581 * new inode allocation towards. The tie-in between inode
582 * allocation and block allocation occurs as we allocate the 582 * allocation and block allocation occurs as we allocate the
583 * first (data) block of an inode and specify the inode (block) 583 * first (data) block of an inode and specify the inode (block)
584 * as the allocation hint for this block. 584 * as the allocation hint for this block.
585 * 585 *
586 * We try to avoid having more than one open file growing in 586 * We try to avoid having more than one open file growing in
587 * an allocation group, as this will lead to fragmentation. 587 * an allocation group, as this will lead to fragmentation.
588 * This differs from the old OS/2 method of trying to keep 588 * This differs from the old OS/2 method of trying to keep
589 * empty ags around for large allocations. 589 * empty ags around for large allocations.
590 * 590 *
591 * PARAMETERS: 591 * PARAMETERS:
592 * ipbmap - pointer to in-core inode for the block map. 592 * ipbmap - pointer to in-core inode for the block map.
593 * 593 *
594 * RETURN VALUES: 594 * RETURN VALUES:
595 * the preferred allocation group number. 595 * the preferred allocation group number.
596 */ 596 */
597 int dbNextAG(struct inode *ipbmap) 597 int dbNextAG(struct inode *ipbmap)
598 { 598 {
599 s64 avgfree; 599 s64 avgfree;
600 int agpref; 600 int agpref;
601 s64 hwm = 0; 601 s64 hwm = 0;
602 int i; 602 int i;
603 int next_best = -1; 603 int next_best = -1;
604 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 604 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
605 605
606 BMAP_LOCK(bmp); 606 BMAP_LOCK(bmp);
607 607
608 /* determine the average number of free blocks within the ags. */ 608 /* determine the average number of free blocks within the ags. */
609 avgfree = (u32)bmp->db_nfree / bmp->db_numag; 609 avgfree = (u32)bmp->db_nfree / bmp->db_numag;
610 610
611 /* 611 /*
612 * if the current preferred ag does not have an active allocator 612 * if the current preferred ag does not have an active allocator
613 * and has at least average freespace, return it 613 * and has at least average freespace, return it
614 */ 614 */
615 agpref = bmp->db_agpref; 615 agpref = bmp->db_agpref;
616 if ((atomic_read(&bmp->db_active[agpref]) == 0) && 616 if ((atomic_read(&bmp->db_active[agpref]) == 0) &&
617 (bmp->db_agfree[agpref] >= avgfree)) 617 (bmp->db_agfree[agpref] >= avgfree))
618 goto unlock; 618 goto unlock;
619 619
620 /* From the last preferred ag, find the next one with at least 620 /* From the last preferred ag, find the next one with at least
621 * average free space. 621 * average free space.
622 */ 622 */
623 for (i = 0 ; i < bmp->db_numag; i++, agpref++) { 623 for (i = 0 ; i < bmp->db_numag; i++, agpref++) {
624 if (agpref == bmp->db_numag) 624 if (agpref == bmp->db_numag)
625 agpref = 0; 625 agpref = 0;
626 626
627 if (atomic_read(&bmp->db_active[agpref])) 627 if (atomic_read(&bmp->db_active[agpref]))
628 /* open file is currently growing in this ag */ 628 /* open file is currently growing in this ag */
629 continue; 629 continue;
630 if (bmp->db_agfree[agpref] >= avgfree) { 630 if (bmp->db_agfree[agpref] >= avgfree) {
631 /* Return this one */ 631 /* Return this one */
632 bmp->db_agpref = agpref; 632 bmp->db_agpref = agpref;
633 goto unlock; 633 goto unlock;
634 } else if (bmp->db_agfree[agpref] > hwm) { 634 } else if (bmp->db_agfree[agpref] > hwm) {
635 /* Less than avg. freespace, but best so far */ 635 /* Less than avg. freespace, but best so far */
636 hwm = bmp->db_agfree[agpref]; 636 hwm = bmp->db_agfree[agpref];
637 next_best = agpref; 637 next_best = agpref;
638 } 638 }
639 } 639 }
640 640
641 /* 641 /*
642 * If no inactive ag was found with average freespace, use the 642 * If no inactive ag was found with average freespace, use the
643 * next best 643 * next best
644 */ 644 */
645 if (next_best != -1) 645 if (next_best != -1)
646 bmp->db_agpref = next_best; 646 bmp->db_agpref = next_best;
647 /* else leave db_agpref unchanged */ 647 /* else leave db_agpref unchanged */
648 unlock: 648 unlock:
649 BMAP_UNLOCK(bmp); 649 BMAP_UNLOCK(bmp);
650 650
651 /* return the preferred group. 651 /* return the preferred group.
652 */ 652 */
653 return (bmp->db_agpref); 653 return (bmp->db_agpref);
654 } 654 }
655 655
656 /* 656 /*
657 * NAME: dbAlloc() 657 * NAME: dbAlloc()
658 * 658 *
659 * FUNCTION: attempt to allocate a specified number of contiguous free 659 * FUNCTION: attempt to allocate a specified number of contiguous free
660 * blocks from the working allocation block map. 660 * blocks from the working allocation block map.
661 * 661 *
662 * the block allocation policy uses hints and a multi-step 662 * the block allocation policy uses hints and a multi-step
663 * approach. 663 * approach.
664 * 664 *
665 * for allocation requests smaller than the number of blocks 665 * for allocation requests smaller than the number of blocks
666 * per dmap, we first try to allocate the new blocks 666 * per dmap, we first try to allocate the new blocks
667 * immediately following the hint. if these blocks are not 667 * immediately following the hint. if these blocks are not
668 * available, we try to allocate blocks near the hint. if 668 * available, we try to allocate blocks near the hint. if
669 * no blocks near the hint are available, we next try to 669 * no blocks near the hint are available, we next try to
670 * allocate within the same dmap as contains the hint. 670 * allocate within the same dmap as contains the hint.
671 * 671 *
672 * if no blocks are available in the dmap or the allocation 672 * if no blocks are available in the dmap or the allocation
673 * request is larger than the dmap size, we try to allocate 673 * request is larger than the dmap size, we try to allocate
674 * within the same allocation group as contains the hint. if 674 * within the same allocation group as contains the hint. if
675 * this does not succeed, we finally try to allocate anywhere 675 * this does not succeed, we finally try to allocate anywhere
676 * within the aggregate. 676 * within the aggregate.
677 * 677 *
678 * we also try to allocate anywhere within the aggregate for 678 * we also try to allocate anywhere within the aggregate for
679 * for allocation requests larger than the allocation group 679 * for allocation requests larger than the allocation group
680 * size or requests that specify no hint value. 680 * size or requests that specify no hint value.
681 * 681 *
682 * PARAMETERS: 682 * PARAMETERS:
683 * ip - pointer to in-core inode; 683 * ip - pointer to in-core inode;
684 * hint - allocation hint. 684 * hint - allocation hint.
685 * nblocks - number of contiguous blocks in the range. 685 * nblocks - number of contiguous blocks in the range.
686 * results - on successful return, set to the starting block number 686 * results - on successful return, set to the starting block number
687 * of the newly allocated contiguous range. 687 * of the newly allocated contiguous range.
688 * 688 *
689 * RETURN VALUES: 689 * RETURN VALUES:
690 * 0 - success 690 * 0 - success
691 * -ENOSPC - insufficient disk resources 691 * -ENOSPC - insufficient disk resources
692 * -EIO - i/o error 692 * -EIO - i/o error
693 */ 693 */
694 int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) 694 int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
695 { 695 {
696 int rc, agno; 696 int rc, agno;
697 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 697 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
698 struct bmap *bmp; 698 struct bmap *bmp;
699 struct metapage *mp; 699 struct metapage *mp;
700 s64 lblkno, blkno; 700 s64 lblkno, blkno;
701 struct dmap *dp; 701 struct dmap *dp;
702 int l2nb; 702 int l2nb;
703 s64 mapSize; 703 s64 mapSize;
704 int writers; 704 int writers;
705 705
706 /* assert that nblocks is valid */ 706 /* assert that nblocks is valid */
707 assert(nblocks > 0); 707 assert(nblocks > 0);
708 708
709 /* get the log2 number of blocks to be allocated. 709 /* get the log2 number of blocks to be allocated.
710 * if the number of blocks is not a log2 multiple, 710 * if the number of blocks is not a log2 multiple,
711 * it will be rounded up to the next log2 multiple. 711 * it will be rounded up to the next log2 multiple.
712 */ 712 */
713 l2nb = BLKSTOL2(nblocks); 713 l2nb = BLKSTOL2(nblocks);
714 714
715 bmp = JFS_SBI(ip->i_sb)->bmap; 715 bmp = JFS_SBI(ip->i_sb)->bmap;
716 716
717 mapSize = bmp->db_mapsize; 717 mapSize = bmp->db_mapsize;
718 718
719 /* the hint should be within the map */ 719 /* the hint should be within the map */
720 if (hint >= mapSize) { 720 if (hint >= mapSize) {
721 jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); 721 jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map");
722 return -EIO; 722 return -EIO;
723 } 723 }
724 724
725 /* if the number of blocks to be allocated is greater than the 725 /* if the number of blocks to be allocated is greater than the
726 * allocation group size, try to allocate anywhere. 726 * allocation group size, try to allocate anywhere.
727 */ 727 */
728 if (l2nb > bmp->db_agl2size) { 728 if (l2nb > bmp->db_agl2size) {
729 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); 729 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
730 730
731 rc = dbAllocAny(bmp, nblocks, l2nb, results); 731 rc = dbAllocAny(bmp, nblocks, l2nb, results);
732 732
733 goto write_unlock; 733 goto write_unlock;
734 } 734 }
735 735
736 /* 736 /*
737 * If no hint, let dbNextAG recommend an allocation group 737 * If no hint, let dbNextAG recommend an allocation group
738 */ 738 */
739 if (hint == 0) 739 if (hint == 0)
740 goto pref_ag; 740 goto pref_ag;
741 741
742 /* we would like to allocate close to the hint. adjust the 742 /* we would like to allocate close to the hint. adjust the
743 * hint to the block following the hint since the allocators 743 * hint to the block following the hint since the allocators
744 * will start looking for free space starting at this point. 744 * will start looking for free space starting at this point.
745 */ 745 */
746 blkno = hint + 1; 746 blkno = hint + 1;
747 747
748 if (blkno >= bmp->db_mapsize) 748 if (blkno >= bmp->db_mapsize)
749 goto pref_ag; 749 goto pref_ag;
750 750
751 agno = blkno >> bmp->db_agl2size; 751 agno = blkno >> bmp->db_agl2size;
752 752
753 /* check if blkno crosses over into a new allocation group. 753 /* check if blkno crosses over into a new allocation group.
754 * if so, check if we should allow allocations within this 754 * if so, check if we should allow allocations within this
755 * allocation group. 755 * allocation group.
756 */ 756 */
757 if ((blkno & (bmp->db_agsize - 1)) == 0) 757 if ((blkno & (bmp->db_agsize - 1)) == 0)
758 /* check if the AG is currenly being written to. 758 /* check if the AG is currenly being written to.
759 * if so, call dbNextAG() to find a non-busy 759 * if so, call dbNextAG() to find a non-busy
760 * AG with sufficient free space. 760 * AG with sufficient free space.
761 */ 761 */
762 if (atomic_read(&bmp->db_active[agno])) 762 if (atomic_read(&bmp->db_active[agno]))
763 goto pref_ag; 763 goto pref_ag;
764 764
765 /* check if the allocation request size can be satisfied from a 765 /* check if the allocation request size can be satisfied from a
766 * single dmap. if so, try to allocate from the dmap containing 766 * single dmap. if so, try to allocate from the dmap containing
767 * the hint using a tiered strategy. 767 * the hint using a tiered strategy.
768 */ 768 */
769 if (nblocks <= BPERDMAP) { 769 if (nblocks <= BPERDMAP) {
770 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 770 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
771 771
772 /* get the buffer for the dmap containing the hint. 772 /* get the buffer for the dmap containing the hint.
773 */ 773 */
774 rc = -EIO; 774 rc = -EIO;
775 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 775 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
776 mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 776 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
777 if (mp == NULL) 777 if (mp == NULL)
778 goto read_unlock; 778 goto read_unlock;
779 779
780 dp = (struct dmap *) mp->data; 780 dp = (struct dmap *) mp->data;
781 781
782 /* first, try to satisfy the allocation request with the 782 /* first, try to satisfy the allocation request with the
783 * blocks beginning at the hint. 783 * blocks beginning at the hint.
784 */ 784 */
785 if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks)) 785 if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks))
786 != -ENOSPC) { 786 != -ENOSPC) {
787 if (rc == 0) { 787 if (rc == 0) {
788 *results = blkno; 788 *results = blkno;
789 mark_metapage_dirty(mp); 789 mark_metapage_dirty(mp);
790 } 790 }
791 791
792 release_metapage(mp); 792 release_metapage(mp);
793 goto read_unlock; 793 goto read_unlock;
794 } 794 }
795 795
796 writers = atomic_read(&bmp->db_active[agno]); 796 writers = atomic_read(&bmp->db_active[agno]);
797 if ((writers > 1) || 797 if ((writers > 1) ||
798 ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) { 798 ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) {
799 /* 799 /*
800 * Someone else is writing in this allocation 800 * Someone else is writing in this allocation
801 * group. To avoid fragmenting, try another ag 801 * group. To avoid fragmenting, try another ag
802 */ 802 */
803 release_metapage(mp); 803 release_metapage(mp);
804 IREAD_UNLOCK(ipbmap); 804 IREAD_UNLOCK(ipbmap);
805 goto pref_ag; 805 goto pref_ag;
806 } 806 }
807 807
808 /* next, try to satisfy the allocation request with blocks 808 /* next, try to satisfy the allocation request with blocks
809 * near the hint. 809 * near the hint.
810 */ 810 */
811 if ((rc = 811 if ((rc =
812 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) 812 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
813 != -ENOSPC) { 813 != -ENOSPC) {
814 if (rc == 0) 814 if (rc == 0)
815 mark_metapage_dirty(mp); 815 mark_metapage_dirty(mp);
816 816
817 release_metapage(mp); 817 release_metapage(mp);
818 goto read_unlock; 818 goto read_unlock;
819 } 819 }
820 820
821 /* try to satisfy the allocation request with blocks within 821 /* try to satisfy the allocation request with blocks within
822 * the same dmap as the hint. 822 * the same dmap as the hint.
823 */ 823 */
824 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) 824 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
825 != -ENOSPC) { 825 != -ENOSPC) {
826 if (rc == 0) 826 if (rc == 0)
827 mark_metapage_dirty(mp); 827 mark_metapage_dirty(mp);
828 828
829 release_metapage(mp); 829 release_metapage(mp);
830 goto read_unlock; 830 goto read_unlock;
831 } 831 }
832 832
833 release_metapage(mp); 833 release_metapage(mp);
834 IREAD_UNLOCK(ipbmap); 834 IREAD_UNLOCK(ipbmap);
835 } 835 }
836 836
837 /* try to satisfy the allocation request with blocks within 837 /* try to satisfy the allocation request with blocks within
838 * the same allocation group as the hint. 838 * the same allocation group as the hint.
839 */ 839 */
840 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); 840 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
841 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) 841 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
842 goto write_unlock; 842 goto write_unlock;
843 843
844 IWRITE_UNLOCK(ipbmap); 844 IWRITE_UNLOCK(ipbmap);
845 845
846 846
847 pref_ag: 847 pref_ag:
848 /* 848 /*
849 * Let dbNextAG recommend a preferred allocation group 849 * Let dbNextAG recommend a preferred allocation group
850 */ 850 */
851 agno = dbNextAG(ipbmap); 851 agno = dbNextAG(ipbmap);
852 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); 852 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
853 853
854 /* Try to allocate within this allocation group. if that fails, try to 854 /* Try to allocate within this allocation group. if that fails, try to
855 * allocate anywhere in the map. 855 * allocate anywhere in the map.
856 */ 856 */
857 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) 857 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
858 rc = dbAllocAny(bmp, nblocks, l2nb, results); 858 rc = dbAllocAny(bmp, nblocks, l2nb, results);
859 859
860 write_unlock: 860 write_unlock:
861 IWRITE_UNLOCK(ipbmap); 861 IWRITE_UNLOCK(ipbmap);
862 862
863 return (rc); 863 return (rc);
864 864
865 read_unlock: 865 read_unlock:
866 IREAD_UNLOCK(ipbmap); 866 IREAD_UNLOCK(ipbmap);
867 867
868 return (rc); 868 return (rc);
869 } 869 }
870 870
871 #ifdef _NOTYET 871 #ifdef _NOTYET
872 /* 872 /*
873 * NAME: dbAllocExact() 873 * NAME: dbAllocExact()
874 * 874 *
875 * FUNCTION: try to allocate the requested extent; 875 * FUNCTION: try to allocate the requested extent;
876 * 876 *
877 * PARAMETERS: 877 * PARAMETERS:
878 * ip - pointer to in-core inode; 878 * ip - pointer to in-core inode;
879 * blkno - extent address; 879 * blkno - extent address;
880 * nblocks - extent length; 880 * nblocks - extent length;
881 * 881 *
882 * RETURN VALUES: 882 * RETURN VALUES:
883 * 0 - success 883 * 0 - success
884 * -ENOSPC - insufficient disk resources 884 * -ENOSPC - insufficient disk resources
885 * -EIO - i/o error 885 * -EIO - i/o error
886 */ 886 */
887 int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) 887 int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
888 { 888 {
889 int rc; 889 int rc;
890 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 890 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
891 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 891 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
892 struct dmap *dp; 892 struct dmap *dp;
893 s64 lblkno; 893 s64 lblkno;
894 struct metapage *mp; 894 struct metapage *mp;
895 895
896 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 896 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
897 897
898 /* 898 /*
899 * validate extent request: 899 * validate extent request:
900 * 900 *
901 * note: defragfs policy: 901 * note: defragfs policy:
902 * max 64 blocks will be moved. 902 * max 64 blocks will be moved.
903 * allocation request size must be satisfied from a single dmap. 903 * allocation request size must be satisfied from a single dmap.
904 */ 904 */
905 if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { 905 if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) {
906 IREAD_UNLOCK(ipbmap); 906 IREAD_UNLOCK(ipbmap);
907 return -EINVAL; 907 return -EINVAL;
908 } 908 }
909 909
910 if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) { 910 if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) {
911 /* the free space is no longer available */ 911 /* the free space is no longer available */
912 IREAD_UNLOCK(ipbmap); 912 IREAD_UNLOCK(ipbmap);
913 return -ENOSPC; 913 return -ENOSPC;
914 } 914 }
915 915
916 /* read in the dmap covering the extent */ 916 /* read in the dmap covering the extent */
917 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 917 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
918 mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 918 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
919 if (mp == NULL) { 919 if (mp == NULL) {
920 IREAD_UNLOCK(ipbmap); 920 IREAD_UNLOCK(ipbmap);
921 return -EIO; 921 return -EIO;
922 } 922 }
923 dp = (struct dmap *) mp->data; 923 dp = (struct dmap *) mp->data;
924 924
925 /* try to allocate the requested extent */ 925 /* try to allocate the requested extent */
926 rc = dbAllocNext(bmp, dp, blkno, nblocks); 926 rc = dbAllocNext(bmp, dp, blkno, nblocks);
927 927
928 IREAD_UNLOCK(ipbmap); 928 IREAD_UNLOCK(ipbmap);
929 929
930 if (rc == 0) 930 if (rc == 0)
931 mark_metapage_dirty(mp); 931 mark_metapage_dirty(mp);
932 932
933 release_metapage(mp); 933 release_metapage(mp);
934 934
935 return (rc); 935 return (rc);
936 } 936 }
937 #endif /* _NOTYET */ 937 #endif /* _NOTYET */
938 938
939 /* 939 /*
940 * NAME: dbReAlloc() 940 * NAME: dbReAlloc()
941 * 941 *
942 * FUNCTION: attempt to extend a current allocation by a specified 942 * FUNCTION: attempt to extend a current allocation by a specified
943 * number of blocks. 943 * number of blocks.
944 * 944 *
945 * this routine attempts to satisfy the allocation request 945 * this routine attempts to satisfy the allocation request
946 * by first trying to extend the existing allocation in 946 * by first trying to extend the existing allocation in
947 * place by allocating the additional blocks as the blocks 947 * place by allocating the additional blocks as the blocks
948 * immediately following the current allocation. if these 948 * immediately following the current allocation. if these
949 * blocks are not available, this routine will attempt to 949 * blocks are not available, this routine will attempt to
950 * allocate a new set of contiguous blocks large enough 950 * allocate a new set of contiguous blocks large enough
951 * to cover the existing allocation plus the additional 951 * to cover the existing allocation plus the additional
952 * number of blocks required. 952 * number of blocks required.
953 * 953 *
954 * PARAMETERS: 954 * PARAMETERS:
955 * ip - pointer to in-core inode requiring allocation. 955 * ip - pointer to in-core inode requiring allocation.
956 * blkno - starting block of the current allocation. 956 * blkno - starting block of the current allocation.
957 * nblocks - number of contiguous blocks within the current 957 * nblocks - number of contiguous blocks within the current
958 * allocation. 958 * allocation.
959 * addnblocks - number of blocks to add to the allocation. 959 * addnblocks - number of blocks to add to the allocation.
960 * results - on successful return, set to the starting block number 960 * results - on successful return, set to the starting block number
961 * of the existing allocation if the existing allocation 961 * of the existing allocation if the existing allocation
962 * was extended in place or to a newly allocated contiguous 962 * was extended in place or to a newly allocated contiguous
963 * range if the existing allocation could not be extended 963 * range if the existing allocation could not be extended
964 * in place. 964 * in place.
965 * 965 *
966 * RETURN VALUES: 966 * RETURN VALUES:
967 * 0 - success 967 * 0 - success
968 * -ENOSPC - insufficient disk resources 968 * -ENOSPC - insufficient disk resources
969 * -EIO - i/o error 969 * -EIO - i/o error
970 */ 970 */
971 int 971 int
972 dbReAlloc(struct inode *ip, 972 dbReAlloc(struct inode *ip,
973 s64 blkno, s64 nblocks, s64 addnblocks, s64 * results) 973 s64 blkno, s64 nblocks, s64 addnblocks, s64 * results)
974 { 974 {
975 int rc; 975 int rc;
976 976
977 /* try to extend the allocation in place. 977 /* try to extend the allocation in place.
978 */ 978 */
979 if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) { 979 if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) {
980 *results = blkno; 980 *results = blkno;
981 return (0); 981 return (0);
982 } else { 982 } else {
983 if (rc != -ENOSPC) 983 if (rc != -ENOSPC)
984 return (rc); 984 return (rc);
985 } 985 }
986 986
987 /* could not extend the allocation in place, so allocate a 987 /* could not extend the allocation in place, so allocate a
988 * new set of blocks for the entire request (i.e. try to get 988 * new set of blocks for the entire request (i.e. try to get
989 * a range of contiguous blocks large enough to cover the 989 * a range of contiguous blocks large enough to cover the
990 * existing allocation plus the additional blocks.) 990 * existing allocation plus the additional blocks.)
991 */ 991 */
992 return (dbAlloc 992 return (dbAlloc
993 (ip, blkno + nblocks - 1, addnblocks + nblocks, results)); 993 (ip, blkno + nblocks - 1, addnblocks + nblocks, results));
994 } 994 }
995 995
996 996
997 /* 997 /*
998 * NAME: dbExtend() 998 * NAME: dbExtend()
999 * 999 *
1000 * FUNCTION: attempt to extend a current allocation by a specified 1000 * FUNCTION: attempt to extend a current allocation by a specified
1001 * number of blocks. 1001 * number of blocks.
1002 * 1002 *
1003 * this routine attempts to satisfy the allocation request 1003 * this routine attempts to satisfy the allocation request
1004 * by first trying to extend the existing allocation in 1004 * by first trying to extend the existing allocation in
1005 * place by allocating the additional blocks as the blocks 1005 * place by allocating the additional blocks as the blocks
1006 * immediately following the current allocation. 1006 * immediately following the current allocation.
1007 * 1007 *
1008 * PARAMETERS: 1008 * PARAMETERS:
1009 * ip - pointer to in-core inode requiring allocation. 1009 * ip - pointer to in-core inode requiring allocation.
1010 * blkno - starting block of the current allocation. 1010 * blkno - starting block of the current allocation.
1011 * nblocks - number of contiguous blocks within the current 1011 * nblocks - number of contiguous blocks within the current
1012 * allocation. 1012 * allocation.
1013 * addnblocks - number of blocks to add to the allocation. 1013 * addnblocks - number of blocks to add to the allocation.
1014 * 1014 *
1015 * RETURN VALUES: 1015 * RETURN VALUES:
1016 * 0 - success 1016 * 0 - success
1017 * -ENOSPC - insufficient disk resources 1017 * -ENOSPC - insufficient disk resources
1018 * -EIO - i/o error 1018 * -EIO - i/o error
1019 */ 1019 */
1020 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) 1020 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1021 { 1021 {
1022 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 1022 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
1023 s64 lblkno, lastblkno, extblkno; 1023 s64 lblkno, lastblkno, extblkno;
1024 uint rel_block; 1024 uint rel_block;
1025 struct metapage *mp; 1025 struct metapage *mp;
1026 struct dmap *dp; 1026 struct dmap *dp;
1027 int rc; 1027 int rc;
1028 struct inode *ipbmap = sbi->ipbmap; 1028 struct inode *ipbmap = sbi->ipbmap;
1029 struct bmap *bmp; 1029 struct bmap *bmp;
1030 1030
1031 /* 1031 /*
1032 * We don't want a non-aligned extent to cross a page boundary 1032 * We don't want a non-aligned extent to cross a page boundary
1033 */ 1033 */
1034 if (((rel_block = blkno & (sbi->nbperpage - 1))) && 1034 if (((rel_block = blkno & (sbi->nbperpage - 1))) &&
1035 (rel_block + nblocks + addnblocks > sbi->nbperpage)) 1035 (rel_block + nblocks + addnblocks > sbi->nbperpage))
1036 return -ENOSPC; 1036 return -ENOSPC;
1037 1037
1038 /* get the last block of the current allocation */ 1038 /* get the last block of the current allocation */
1039 lastblkno = blkno + nblocks - 1; 1039 lastblkno = blkno + nblocks - 1;
1040 1040
1041 /* determine the block number of the block following 1041 /* determine the block number of the block following
1042 * the existing allocation. 1042 * the existing allocation.
1043 */ 1043 */
1044 extblkno = lastblkno + 1; 1044 extblkno = lastblkno + 1;
1045 1045
1046 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 1046 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
1047 1047
1048 /* better be within the file system */ 1048 /* better be within the file system */
1049 bmp = sbi->bmap; 1049 bmp = sbi->bmap;
1050 if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { 1050 if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) {
1051 IREAD_UNLOCK(ipbmap); 1051 IREAD_UNLOCK(ipbmap);
1052 jfs_error(ip->i_sb, 1052 jfs_error(ip->i_sb,
1053 "dbExtend: the block is outside the filesystem"); 1053 "dbExtend: the block is outside the filesystem");
1054 return -EIO; 1054 return -EIO;
1055 } 1055 }
1056 1056
1057 /* we'll attempt to extend the current allocation in place by 1057 /* we'll attempt to extend the current allocation in place by
1058 * allocating the additional blocks as the blocks immediately 1058 * allocating the additional blocks as the blocks immediately
1059 * following the current allocation. we only try to extend the 1059 * following the current allocation. we only try to extend the
1060 * current allocation in place if the number of additional blocks 1060 * current allocation in place if the number of additional blocks
1061 * can fit into a dmap, the last block of the current allocation 1061 * can fit into a dmap, the last block of the current allocation
1062 * is not the last block of the file system, and the start of the 1062 * is not the last block of the file system, and the start of the
1063 * inplace extension is not on an allocation group boundary. 1063 * inplace extension is not on an allocation group boundary.
1064 */ 1064 */
1065 if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize || 1065 if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize ||
1066 (extblkno & (bmp->db_agsize - 1)) == 0) { 1066 (extblkno & (bmp->db_agsize - 1)) == 0) {
1067 IREAD_UNLOCK(ipbmap); 1067 IREAD_UNLOCK(ipbmap);
1068 return -ENOSPC; 1068 return -ENOSPC;
1069 } 1069 }
1070 1070
1071 /* get the buffer for the dmap containing the first block 1071 /* get the buffer for the dmap containing the first block
1072 * of the extension. 1072 * of the extension.
1073 */ 1073 */
1074 lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage); 1074 lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage);
1075 mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 1075 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
1076 if (mp == NULL) { 1076 if (mp == NULL) {
1077 IREAD_UNLOCK(ipbmap); 1077 IREAD_UNLOCK(ipbmap);
1078 return -EIO; 1078 return -EIO;
1079 } 1079 }
1080 1080
1081 dp = (struct dmap *) mp->data; 1081 dp = (struct dmap *) mp->data;
1082 1082
1083 /* try to allocate the blocks immediately following the 1083 /* try to allocate the blocks immediately following the
1084 * current allocation. 1084 * current allocation.
1085 */ 1085 */
1086 rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks); 1086 rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks);
1087 1087
1088 IREAD_UNLOCK(ipbmap); 1088 IREAD_UNLOCK(ipbmap);
1089 1089
1090 /* were we successful ? */ 1090 /* were we successful ? */
1091 if (rc == 0) 1091 if (rc == 0)
1092 write_metapage(mp); 1092 write_metapage(mp);
1093 else 1093 else
1094 /* we were not successful */ 1094 /* we were not successful */
1095 release_metapage(mp); 1095 release_metapage(mp);
1096 1096
1097 1097
1098 return (rc); 1098 return (rc);
1099 } 1099 }
1100 1100
1101 1101
1102 /* 1102 /*
1103 * NAME: dbAllocNext() 1103 * NAME: dbAllocNext()
1104 * 1104 *
1105 * FUNCTION: attempt to allocate the blocks of the specified block 1105 * FUNCTION: attempt to allocate the blocks of the specified block
1106 * range within a dmap. 1106 * range within a dmap.
1107 * 1107 *
1108 * PARAMETERS: 1108 * PARAMETERS:
1109 * bmp - pointer to bmap descriptor 1109 * bmp - pointer to bmap descriptor
1110 * dp - pointer to dmap. 1110 * dp - pointer to dmap.
1111 * blkno - starting block number of the range. 1111 * blkno - starting block number of the range.
1112 * nblocks - number of contiguous free blocks of the range. 1112 * nblocks - number of contiguous free blocks of the range.
1113 * 1113 *
1114 * RETURN VALUES: 1114 * RETURN VALUES:
1115 * 0 - success 1115 * 0 - success
1116 * -ENOSPC - insufficient disk resources 1116 * -ENOSPC - insufficient disk resources
1117 * -EIO - i/o error 1117 * -EIO - i/o error
1118 * 1118 *
1119 * serialization: IREAD_LOCK(ipbmap) held on entry/exit; 1119 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1120 */ 1120 */
1121 static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, 1121 static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
1122 int nblocks) 1122 int nblocks)
1123 { 1123 {
1124 int dbitno, word, rembits, nb, nwords, wbitno, nw; 1124 int dbitno, word, rembits, nb, nwords, wbitno, nw;
1125 int l2size; 1125 int l2size;
1126 s8 *leaf; 1126 s8 *leaf;
1127 u32 mask; 1127 u32 mask;
1128 1128
1129 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { 1129 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
1130 jfs_error(bmp->db_ipbmap->i_sb, 1130 jfs_error(bmp->db_ipbmap->i_sb,
1131 "dbAllocNext: Corrupt dmap page"); 1131 "dbAllocNext: Corrupt dmap page");
1132 return -EIO; 1132 return -EIO;
1133 } 1133 }
1134 1134
1135 /* pick up a pointer to the leaves of the dmap tree. 1135 /* pick up a pointer to the leaves of the dmap tree.
1136 */ 1136 */
1137 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); 1137 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
1138 1138
1139 /* determine the bit number and word within the dmap of the 1139 /* determine the bit number and word within the dmap of the
1140 * starting block. 1140 * starting block.
1141 */ 1141 */
1142 dbitno = blkno & (BPERDMAP - 1); 1142 dbitno = blkno & (BPERDMAP - 1);
1143 word = dbitno >> L2DBWORD; 1143 word = dbitno >> L2DBWORD;
1144 1144
1145 /* check if the specified block range is contained within 1145 /* check if the specified block range is contained within
1146 * this dmap. 1146 * this dmap.
1147 */ 1147 */
1148 if (dbitno + nblocks > BPERDMAP) 1148 if (dbitno + nblocks > BPERDMAP)
1149 return -ENOSPC; 1149 return -ENOSPC;
1150 1150
1151 /* check if the starting leaf indicates that anything 1151 /* check if the starting leaf indicates that anything
1152 * is free. 1152 * is free.
1153 */ 1153 */
1154 if (leaf[word] == NOFREE) 1154 if (leaf[word] == NOFREE)
1155 return -ENOSPC; 1155 return -ENOSPC;
1156 1156
1157 /* check the dmaps words corresponding to block range to see 1157 /* check the dmaps words corresponding to block range to see
1158 * if the block range is free. not all bits of the first and 1158 * if the block range is free. not all bits of the first and
1159 * last words may be contained within the block range. if this 1159 * last words may be contained within the block range. if this
1160 * is the case, we'll work against those words (i.e. partial first 1160 * is the case, we'll work against those words (i.e. partial first
1161 * and/or last) on an individual basis (a single pass) and examine 1161 * and/or last) on an individual basis (a single pass) and examine
1162 * the actual bits to determine if they are free. a single pass 1162 * the actual bits to determine if they are free. a single pass
1163 * will be used for all dmap words fully contained within the 1163 * will be used for all dmap words fully contained within the
1164 * specified range. within this pass, the leaves of the dmap 1164 * specified range. within this pass, the leaves of the dmap
1165 * tree will be examined to determine if the blocks are free. a 1165 * tree will be examined to determine if the blocks are free. a
1166 * single leaf may describe the free space of multiple dmap 1166 * single leaf may describe the free space of multiple dmap
1167 * words, so we may visit only a subset of the actual leaves 1167 * words, so we may visit only a subset of the actual leaves
1168 * corresponding to the dmap words of the block range. 1168 * corresponding to the dmap words of the block range.
1169 */ 1169 */
1170 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { 1170 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
1171 /* determine the bit number within the word and 1171 /* determine the bit number within the word and
1172 * the number of bits within the word. 1172 * the number of bits within the word.
1173 */ 1173 */
1174 wbitno = dbitno & (DBWORD - 1); 1174 wbitno = dbitno & (DBWORD - 1);
1175 nb = min(rembits, DBWORD - wbitno); 1175 nb = min(rembits, DBWORD - wbitno);
1176 1176
1177 /* check if only part of the word is to be examined. 1177 /* check if only part of the word is to be examined.
1178 */ 1178 */
1179 if (nb < DBWORD) { 1179 if (nb < DBWORD) {
1180 /* check if the bits are free. 1180 /* check if the bits are free.
1181 */ 1181 */
1182 mask = (ONES << (DBWORD - nb) >> wbitno); 1182 mask = (ONES << (DBWORD - nb) >> wbitno);
1183 if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask) 1183 if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask)
1184 return -ENOSPC; 1184 return -ENOSPC;
1185 1185
1186 word += 1; 1186 word += 1;
1187 } else { 1187 } else {
1188 /* one or more dmap words are fully contained 1188 /* one or more dmap words are fully contained
1189 * within the block range. determine how many 1189 * within the block range. determine how many
1190 * words and how many bits. 1190 * words and how many bits.
1191 */ 1191 */
1192 nwords = rembits >> L2DBWORD; 1192 nwords = rembits >> L2DBWORD;
1193 nb = nwords << L2DBWORD; 1193 nb = nwords << L2DBWORD;
1194 1194
1195 /* now examine the appropriate leaves to determine 1195 /* now examine the appropriate leaves to determine
1196 * if the blocks are free. 1196 * if the blocks are free.
1197 */ 1197 */
1198 while (nwords > 0) { 1198 while (nwords > 0) {
1199 /* does the leaf describe any free space ? 1199 /* does the leaf describe any free space ?
1200 */ 1200 */
1201 if (leaf[word] < BUDMIN) 1201 if (leaf[word] < BUDMIN)
1202 return -ENOSPC; 1202 return -ENOSPC;
1203 1203
1204 /* determine the l2 number of bits provided 1204 /* determine the l2 number of bits provided
1205 * by this leaf. 1205 * by this leaf.
1206 */ 1206 */
1207 l2size = 1207 l2size =
1208 min((int)leaf[word], NLSTOL2BSZ(nwords)); 1208 min((int)leaf[word], NLSTOL2BSZ(nwords));
1209 1209
1210 /* determine how many words were handled. 1210 /* determine how many words were handled.
1211 */ 1211 */
1212 nw = BUDSIZE(l2size, BUDMIN); 1212 nw = BUDSIZE(l2size, BUDMIN);
1213 1213
1214 nwords -= nw; 1214 nwords -= nw;
1215 word += nw; 1215 word += nw;
1216 } 1216 }
1217 } 1217 }
1218 } 1218 }
1219 1219
1220 /* allocate the blocks. 1220 /* allocate the blocks.
1221 */ 1221 */
1222 return (dbAllocDmap(bmp, dp, blkno, nblocks)); 1222 return (dbAllocDmap(bmp, dp, blkno, nblocks));
1223 } 1223 }
1224 1224
1225 1225
1226 /* 1226 /*
1227 * NAME: dbAllocNear() 1227 * NAME: dbAllocNear()
1228 * 1228 *
1229 * FUNCTION: attempt to allocate a number of contiguous free blocks near 1229 * FUNCTION: attempt to allocate a number of contiguous free blocks near
1230 * a specified block (hint) within a dmap. 1230 * a specified block (hint) within a dmap.
1231 * 1231 *
1232 * starting with the dmap leaf that covers the hint, we'll 1232 * starting with the dmap leaf that covers the hint, we'll
1233 * check the next four contiguous leaves for sufficient free 1233 * check the next four contiguous leaves for sufficient free
1234 * space. if sufficient free space is found, we'll allocate 1234 * space. if sufficient free space is found, we'll allocate
1235 * the desired free space. 1235 * the desired free space.
1236 * 1236 *
1237 * PARAMETERS: 1237 * PARAMETERS:
1238 * bmp - pointer to bmap descriptor 1238 * bmp - pointer to bmap descriptor
1239 * dp - pointer to dmap. 1239 * dp - pointer to dmap.
1240 * blkno - block number to allocate near. 1240 * blkno - block number to allocate near.
1241 * nblocks - actual number of contiguous free blocks desired. 1241 * nblocks - actual number of contiguous free blocks desired.
1242 * l2nb - log2 number of contiguous free blocks desired. 1242 * l2nb - log2 number of contiguous free blocks desired.
1243 * results - on successful return, set to the starting block number 1243 * results - on successful return, set to the starting block number
1244 * of the newly allocated range. 1244 * of the newly allocated range.
1245 * 1245 *
1246 * RETURN VALUES: 1246 * RETURN VALUES:
1247 * 0 - success 1247 * 0 - success
1248 * -ENOSPC - insufficient disk resources 1248 * -ENOSPC - insufficient disk resources
1249 * -EIO - i/o error 1249 * -EIO - i/o error
1250 * 1250 *
1251 * serialization: IREAD_LOCK(ipbmap) held on entry/exit; 1251 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1252 */ 1252 */
1253 static int 1253 static int
1254 dbAllocNear(struct bmap * bmp, 1254 dbAllocNear(struct bmap * bmp,
1255 struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results) 1255 struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results)
1256 { 1256 {
1257 int word, lword, rc; 1257 int word, lword, rc;
1258 s8 *leaf; 1258 s8 *leaf;
1259 1259
1260 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { 1260 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
1261 jfs_error(bmp->db_ipbmap->i_sb, 1261 jfs_error(bmp->db_ipbmap->i_sb,
1262 "dbAllocNear: Corrupt dmap page"); 1262 "dbAllocNear: Corrupt dmap page");
1263 return -EIO; 1263 return -EIO;
1264 } 1264 }
1265 1265
1266 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); 1266 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
1267 1267
1268 /* determine the word within the dmap that holds the hint 1268 /* determine the word within the dmap that holds the hint
1269 * (i.e. blkno). also, determine the last word in the dmap 1269 * (i.e. blkno). also, determine the last word in the dmap
1270 * that we'll include in our examination. 1270 * that we'll include in our examination.
1271 */ 1271 */
1272 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; 1272 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
1273 lword = min(word + 4, LPERDMAP); 1273 lword = min(word + 4, LPERDMAP);
1274 1274
1275 /* examine the leaves for sufficient free space. 1275 /* examine the leaves for sufficient free space.
1276 */ 1276 */
1277 for (; word < lword; word++) { 1277 for (; word < lword; word++) {
1278 /* does the leaf describe sufficient free space ? 1278 /* does the leaf describe sufficient free space ?
1279 */ 1279 */
1280 if (leaf[word] < l2nb) 1280 if (leaf[word] < l2nb)
1281 continue; 1281 continue;
1282 1282
1283 /* determine the block number within the file system 1283 /* determine the block number within the file system
1284 * of the first block described by this dmap word. 1284 * of the first block described by this dmap word.
1285 */ 1285 */
1286 blkno = le64_to_cpu(dp->start) + (word << L2DBWORD); 1286 blkno = le64_to_cpu(dp->start) + (word << L2DBWORD);
1287 1287
1288 /* if not all bits of the dmap word are free, get the 1288 /* if not all bits of the dmap word are free, get the
1289 * starting bit number within the dmap word of the required 1289 * starting bit number within the dmap word of the required
1290 * string of free bits and adjust the block number with the 1290 * string of free bits and adjust the block number with the
1291 * value. 1291 * value.
1292 */ 1292 */
1293 if (leaf[word] < BUDMIN) 1293 if (leaf[word] < BUDMIN)
1294 blkno += 1294 blkno +=
1295 dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb); 1295 dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb);
1296 1296
1297 /* allocate the blocks. 1297 /* allocate the blocks.
1298 */ 1298 */
1299 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) 1299 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
1300 *results = blkno; 1300 *results = blkno;
1301 1301
1302 return (rc); 1302 return (rc);
1303 } 1303 }
1304 1304
1305 return -ENOSPC; 1305 return -ENOSPC;
1306 } 1306 }
1307 1307
1308 1308
1309 /* 1309 /*
1310 * NAME: dbAllocAG() 1310 * NAME: dbAllocAG()
1311 * 1311 *
1312 * FUNCTION: attempt to allocate the specified number of contiguous 1312 * FUNCTION: attempt to allocate the specified number of contiguous
1313 * free blocks within the specified allocation group. 1313 * free blocks within the specified allocation group.
1314 * 1314 *
1315 * unless the allocation group size is equal to the number 1315 * unless the allocation group size is equal to the number
1316 * of blocks per dmap, the dmap control pages will be used to 1316 * of blocks per dmap, the dmap control pages will be used to
1317 * find the required free space, if available. we start the 1317 * find the required free space, if available. we start the
1318 * search at the highest dmap control page level which 1318 * search at the highest dmap control page level which
1319 * distinctly describes the allocation group's free space 1319 * distinctly describes the allocation group's free space
1320 * (i.e. the highest level at which the allocation group's 1320 * (i.e. the highest level at which the allocation group's
1321 * free space is not mixed in with that of any other group). 1321 * free space is not mixed in with that of any other group).
1322 * in addition, we start the search within this level at a 1322 * in addition, we start the search within this level at a
1323 * height of the dmapctl dmtree at which the nodes distinctly 1323 * height of the dmapctl dmtree at which the nodes distinctly
1324 * describe the allocation group's free space. at this height, 1324 * describe the allocation group's free space. at this height,
1325 * the allocation group's free space may be represented by 1 1325 * the allocation group's free space may be represented by 1
1326 * or two sub-trees, depending on the allocation group size. 1326 * or two sub-trees, depending on the allocation group size.
1327 * we search the top nodes of these subtrees left to right for 1327 * we search the top nodes of these subtrees left to right for
1328 * sufficient free space. if sufficient free space is found, 1328 * sufficient free space. if sufficient free space is found,
1329 * the subtree is searched to find the leftmost leaf that 1329 * the subtree is searched to find the leftmost leaf that
1330 * has free space. once we have made it to the leaf, we 1330 * has free space. once we have made it to the leaf, we
1331 * move the search to the next lower level dmap control page 1331 * move the search to the next lower level dmap control page
1332 * corresponding to this leaf. we continue down the dmap control 1332 * corresponding to this leaf. we continue down the dmap control
1333 * pages until we find the dmap that contains or starts the 1333 * pages until we find the dmap that contains or starts the
1334 * sufficient free space and we allocate at this dmap. 1334 * sufficient free space and we allocate at this dmap.
1335 * 1335 *
1336 * if the allocation group size is equal to the dmap size, 1336 * if the allocation group size is equal to the dmap size,
1337 * we'll start at the dmap corresponding to the allocation 1337 * we'll start at the dmap corresponding to the allocation
1338 * group and attempt the allocation at this level. 1338 * group and attempt the allocation at this level.
1339 * 1339 *
1340 * the dmap control page search is also not performed if the 1340 * the dmap control page search is also not performed if the
1341 * allocation group is completely free and we go to the first 1341 * allocation group is completely free and we go to the first
1342 * dmap of the allocation group to do the allocation. this is 1342 * dmap of the allocation group to do the allocation. this is
1343 * done because the allocation group may be part (not the first 1343 * done because the allocation group may be part (not the first
1344 * part) of a larger binary buddy system, causing the dmap 1344 * part) of a larger binary buddy system, causing the dmap
1345 * control pages to indicate no free space (NOFREE) within 1345 * control pages to indicate no free space (NOFREE) within
1346 * the allocation group. 1346 * the allocation group.
1347 * 1347 *
1348 * PARAMETERS: 1348 * PARAMETERS:
1349 * bmp - pointer to bmap descriptor 1349 * bmp - pointer to bmap descriptor
1350 * agno - allocation group number. 1350 * agno - allocation group number.
1351 * nblocks - actual number of contiguous free blocks desired. 1351 * nblocks - actual number of contiguous free blocks desired.
1352 * l2nb - log2 number of contiguous free blocks desired. 1352 * l2nb - log2 number of contiguous free blocks desired.
1353 * results - on successful return, set to the starting block number 1353 * results - on successful return, set to the starting block number
1354 * of the newly allocated range. 1354 * of the newly allocated range.
1355 * 1355 *
1356 * RETURN VALUES: 1356 * RETURN VALUES:
1357 * 0 - success 1357 * 0 - success
1358 * -ENOSPC - insufficient disk resources 1358 * -ENOSPC - insufficient disk resources
1359 * -EIO - i/o error 1359 * -EIO - i/o error
1360 * 1360 *
1361 * note: IWRITE_LOCK(ipmap) held on entry/exit; 1361 * note: IWRITE_LOCK(ipmap) held on entry/exit;
1362 */ 1362 */
1363 static int 1363 static int
1364 dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) 1364 dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1365 { 1365 {
1366 struct metapage *mp; 1366 struct metapage *mp;
1367 struct dmapctl *dcp; 1367 struct dmapctl *dcp;
1368 int rc, ti, i, k, m, n, agperlev; 1368 int rc, ti, i, k, m, n, agperlev;
1369 s64 blkno, lblkno; 1369 s64 blkno, lblkno;
1370 int budmin; 1370 int budmin;
1371 1371
1372 /* allocation request should not be for more than the 1372 /* allocation request should not be for more than the
1373 * allocation group size. 1373 * allocation group size.
1374 */ 1374 */
1375 if (l2nb > bmp->db_agl2size) { 1375 if (l2nb > bmp->db_agl2size) {
1376 jfs_error(bmp->db_ipbmap->i_sb, 1376 jfs_error(bmp->db_ipbmap->i_sb,
1377 "dbAllocAG: allocation request is larger than the " 1377 "dbAllocAG: allocation request is larger than the "
1378 "allocation group size"); 1378 "allocation group size");
1379 return -EIO; 1379 return -EIO;
1380 } 1380 }
1381 1381
1382 /* determine the starting block number of the allocation 1382 /* determine the starting block number of the allocation
1383 * group. 1383 * group.
1384 */ 1384 */
1385 blkno = (s64) agno << bmp->db_agl2size; 1385 blkno = (s64) agno << bmp->db_agl2size;
1386 1386
1387 /* check if the allocation group size is the minimum allocation 1387 /* check if the allocation group size is the minimum allocation
1388 * group size or if the allocation group is completely free. if 1388 * group size or if the allocation group is completely free. if
1389 * the allocation group size is the minimum size of BPERDMAP (i.e. 1389 * the allocation group size is the minimum size of BPERDMAP (i.e.
1390 * 1 dmap), there is no need to search the dmap control page (below) 1390 * 1 dmap), there is no need to search the dmap control page (below)
1391 * that fully describes the allocation group since the allocation 1391 * that fully describes the allocation group since the allocation
1392 * group is already fully described by a dmap. in this case, we 1392 * group is already fully described by a dmap. in this case, we
1393 * just call dbAllocCtl() to search the dmap tree and allocate the 1393 * just call dbAllocCtl() to search the dmap tree and allocate the
1394 * required space if available. 1394 * required space if available.
1395 * 1395 *
1396 * if the allocation group is completely free, dbAllocCtl() is 1396 * if the allocation group is completely free, dbAllocCtl() is
1397 * also called to allocate the required space. this is done for 1397 * also called to allocate the required space. this is done for
1398 * two reasons. first, it makes no sense searching the dmap control 1398 * two reasons. first, it makes no sense searching the dmap control
1399 * pages for free space when we know that free space exists. second, 1399 * pages for free space when we know that free space exists. second,
1400 * the dmap control pages may indicate that the allocation group 1400 * the dmap control pages may indicate that the allocation group
1401 * has no free space if the allocation group is part (not the first 1401 * has no free space if the allocation group is part (not the first
1402 * part) of a larger binary buddy system. 1402 * part) of a larger binary buddy system.
1403 */ 1403 */
1404 if (bmp->db_agsize == BPERDMAP 1404 if (bmp->db_agsize == BPERDMAP
1405 || bmp->db_agfree[agno] == bmp->db_agsize) { 1405 || bmp->db_agfree[agno] == bmp->db_agsize) {
1406 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); 1406 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1407 if ((rc == -ENOSPC) && 1407 if ((rc == -ENOSPC) &&
1408 (bmp->db_agfree[agno] == bmp->db_agsize)) { 1408 (bmp->db_agfree[agno] == bmp->db_agsize)) {
1409 printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n", 1409 printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n",
1410 (unsigned long long) blkno, 1410 (unsigned long long) blkno,
1411 (unsigned long long) nblocks); 1411 (unsigned long long) nblocks);
1412 jfs_error(bmp->db_ipbmap->i_sb, 1412 jfs_error(bmp->db_ipbmap->i_sb,
1413 "dbAllocAG: dbAllocCtl failed in free AG"); 1413 "dbAllocAG: dbAllocCtl failed in free AG");
1414 } 1414 }
1415 return (rc); 1415 return (rc);
1416 } 1416 }
1417 1417
1418 /* the buffer for the dmap control page that fully describes the 1418 /* the buffer for the dmap control page that fully describes the
1419 * allocation group. 1419 * allocation group.
1420 */ 1420 */
1421 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel); 1421 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel);
1422 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 1422 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1423 if (mp == NULL) 1423 if (mp == NULL)
1424 return -EIO; 1424 return -EIO;
1425 dcp = (struct dmapctl *) mp->data; 1425 dcp = (struct dmapctl *) mp->data;
1426 budmin = dcp->budmin; 1426 budmin = dcp->budmin;
1427 1427
1428 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { 1428 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
1429 jfs_error(bmp->db_ipbmap->i_sb, 1429 jfs_error(bmp->db_ipbmap->i_sb,
1430 "dbAllocAG: Corrupt dmapctl page"); 1430 "dbAllocAG: Corrupt dmapctl page");
1431 release_metapage(mp); 1431 release_metapage(mp);
1432 return -EIO; 1432 return -EIO;
1433 } 1433 }
1434 1434
1435 /* search the subtree(s) of the dmap control page that describes 1435 /* search the subtree(s) of the dmap control page that describes
1436 * the allocation group, looking for sufficient free space. to begin, 1436 * the allocation group, looking for sufficient free space. to begin,
1437 * determine how many allocation groups are represented in a dmap 1437 * determine how many allocation groups are represented in a dmap
1438 * control page at the control page level (i.e. L0, L1, L2) that 1438 * control page at the control page level (i.e. L0, L1, L2) that
1439 * fully describes an allocation group. next, determine the starting 1439 * fully describes an allocation group. next, determine the starting
1440 * tree index of this allocation group within the control page. 1440 * tree index of this allocation group within the control page.
1441 */ 1441 */
1442 agperlev = 1442 agperlev =
1443 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; 1443 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth;
1444 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); 1444 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
1445 1445
1446 /* dmap control page trees fan-out by 4 and a single allocation 1446 /* dmap control page trees fan-out by 4 and a single allocation
1447 * group may be described by 1 or 2 subtrees within the ag level 1447 * group may be described by 1 or 2 subtrees within the ag level
1448 * dmap control page, depending upon the ag size. examine the ag's 1448 * dmap control page, depending upon the ag size. examine the ag's
1449 * subtrees for sufficient free space, starting with the leftmost 1449 * subtrees for sufficient free space, starting with the leftmost
1450 * subtree. 1450 * subtree.
1451 */ 1451 */
1452 for (i = 0; i < bmp->db_agwidth; i++, ti++) { 1452 for (i = 0; i < bmp->db_agwidth; i++, ti++) {
1453 /* is there sufficient free space ? 1453 /* is there sufficient free space ?
1454 */ 1454 */
1455 if (l2nb > dcp->stree[ti]) 1455 if (l2nb > dcp->stree[ti])
1456 continue; 1456 continue;
1457 1457
1458 /* sufficient free space found in a subtree. now search down 1458 /* sufficient free space found in a subtree. now search down
1459 * the subtree to find the leftmost leaf that describes this 1459 * the subtree to find the leftmost leaf that describes this
1460 * free space. 1460 * free space.
1461 */ 1461 */
1462 for (k = bmp->db_agheigth; k > 0; k--) { 1462 for (k = bmp->db_agheigth; k > 0; k--) {
1463 for (n = 0, m = (ti << 2) + 1; n < 4; n++) { 1463 for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
1464 if (l2nb <= dcp->stree[m + n]) { 1464 if (l2nb <= dcp->stree[m + n]) {
1465 ti = m + n; 1465 ti = m + n;
1466 break; 1466 break;
1467 } 1467 }
1468 } 1468 }
1469 if (n == 4) { 1469 if (n == 4) {
1470 jfs_error(bmp->db_ipbmap->i_sb, 1470 jfs_error(bmp->db_ipbmap->i_sb,
1471 "dbAllocAG: failed descending stree"); 1471 "dbAllocAG: failed descending stree");
1472 release_metapage(mp); 1472 release_metapage(mp);
1473 return -EIO; 1473 return -EIO;
1474 } 1474 }
1475 } 1475 }
1476 1476
1477 /* determine the block number within the file system 1477 /* determine the block number within the file system
1478 * that corresponds to this leaf. 1478 * that corresponds to this leaf.
1479 */ 1479 */
1480 if (bmp->db_aglevel == 2) 1480 if (bmp->db_aglevel == 2)
1481 blkno = 0; 1481 blkno = 0;
1482 else if (bmp->db_aglevel == 1) 1482 else if (bmp->db_aglevel == 1)
1483 blkno &= ~(MAXL1SIZE - 1); 1483 blkno &= ~(MAXL1SIZE - 1);
1484 else /* bmp->db_aglevel == 0 */ 1484 else /* bmp->db_aglevel == 0 */
1485 blkno &= ~(MAXL0SIZE - 1); 1485 blkno &= ~(MAXL0SIZE - 1);
1486 1486
1487 blkno += 1487 blkno +=
1488 ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin; 1488 ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin;
1489 1489
1490 /* release the buffer in preparation for going down 1490 /* release the buffer in preparation for going down
1491 * the next level of dmap control pages. 1491 * the next level of dmap control pages.
1492 */ 1492 */
1493 release_metapage(mp); 1493 release_metapage(mp);
1494 1494
1495 /* check if we need to continue to search down the lower 1495 /* check if we need to continue to search down the lower
1496 * level dmap control pages. we need to if the number of 1496 * level dmap control pages. we need to if the number of
1497 * blocks required is less than maximum number of blocks 1497 * blocks required is less than maximum number of blocks
1498 * described at the next lower level. 1498 * described at the next lower level.
1499 */ 1499 */
1500 if (l2nb < budmin) { 1500 if (l2nb < budmin) {
1501 1501
1502 /* search the lower level dmap control pages to get 1502 /* search the lower level dmap control pages to get
1503 * the starting block number of the dmap that 1503 * the starting block number of the dmap that
1504 * contains or starts off the free space. 1504 * contains or starts off the free space.
1505 */ 1505 */
1506 if ((rc = 1506 if ((rc =
1507 dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1, 1507 dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1,
1508 &blkno))) { 1508 &blkno))) {
1509 if (rc == -ENOSPC) { 1509 if (rc == -ENOSPC) {
1510 jfs_error(bmp->db_ipbmap->i_sb, 1510 jfs_error(bmp->db_ipbmap->i_sb,
1511 "dbAllocAG: control page " 1511 "dbAllocAG: control page "
1512 "inconsistent"); 1512 "inconsistent");
1513 return -EIO; 1513 return -EIO;
1514 } 1514 }
1515 return (rc); 1515 return (rc);
1516 } 1516 }
1517 } 1517 }
1518 1518
1519 /* allocate the blocks. 1519 /* allocate the blocks.
1520 */ 1520 */
1521 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); 1521 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1522 if (rc == -ENOSPC) { 1522 if (rc == -ENOSPC) {
1523 jfs_error(bmp->db_ipbmap->i_sb, 1523 jfs_error(bmp->db_ipbmap->i_sb,
1524 "dbAllocAG: unable to allocate blocks"); 1524 "dbAllocAG: unable to allocate blocks");
1525 rc = -EIO; 1525 rc = -EIO;
1526 } 1526 }
1527 return (rc); 1527 return (rc);
1528 } 1528 }
1529 1529
1530 /* no space in the allocation group. release the buffer and 1530 /* no space in the allocation group. release the buffer and
1531 * return -ENOSPC. 1531 * return -ENOSPC.
1532 */ 1532 */
1533 release_metapage(mp); 1533 release_metapage(mp);
1534 1534
1535 return -ENOSPC; 1535 return -ENOSPC;
1536 } 1536 }
1537 1537
1538 1538
1539 /* 1539 /*
1540 * NAME: dbAllocAny() 1540 * NAME: dbAllocAny()
1541 * 1541 *
1542 * FUNCTION: attempt to allocate the specified number of contiguous 1542 * FUNCTION: attempt to allocate the specified number of contiguous
1543 * free blocks anywhere in the file system. 1543 * free blocks anywhere in the file system.
1544 * 1544 *
1545 * dbAllocAny() attempts to find the sufficient free space by 1545 * dbAllocAny() attempts to find the sufficient free space by
1546 * searching down the dmap control pages, starting with the 1546 * searching down the dmap control pages, starting with the
1547 * highest level (i.e. L0, L1, L2) control page. if free space 1547 * highest level (i.e. L0, L1, L2) control page. if free space
1548 * large enough to satisfy the desired free space is found, the 1548 * large enough to satisfy the desired free space is found, the
1549 * desired free space is allocated. 1549 * desired free space is allocated.
1550 * 1550 *
1551 * PARAMETERS: 1551 * PARAMETERS:
1552 * bmp - pointer to bmap descriptor 1552 * bmp - pointer to bmap descriptor
1553 * nblocks - actual number of contiguous free blocks desired. 1553 * nblocks - actual number of contiguous free blocks desired.
1554 * l2nb - log2 number of contiguous free blocks desired. 1554 * l2nb - log2 number of contiguous free blocks desired.
1555 * results - on successful return, set to the starting block number 1555 * results - on successful return, set to the starting block number
1556 * of the newly allocated range. 1556 * of the newly allocated range.
1557 * 1557 *
1558 * RETURN VALUES: 1558 * RETURN VALUES:
1559 * 0 - success 1559 * 0 - success
1560 * -ENOSPC - insufficient disk resources 1560 * -ENOSPC - insufficient disk resources
1561 * -EIO - i/o error 1561 * -EIO - i/o error
1562 * 1562 *
1563 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1563 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1564 */ 1564 */
1565 static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) 1565 static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1566 { 1566 {
1567 int rc; 1567 int rc;
1568 s64 blkno = 0; 1568 s64 blkno = 0;
1569 1569
1570 /* starting with the top level dmap control page, search 1570 /* starting with the top level dmap control page, search
1571 * down the dmap control levels for sufficient free space. 1571 * down the dmap control levels for sufficient free space.
1572 * if free space is found, dbFindCtl() returns the starting 1572 * if free space is found, dbFindCtl() returns the starting
1573 * block number of the dmap that contains or starts off the 1573 * block number of the dmap that contains or starts off the
1574 * range of free space. 1574 * range of free space.
1575 */ 1575 */
1576 if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno))) 1576 if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno)))
1577 return (rc); 1577 return (rc);
1578 1578
1579 /* allocate the blocks. 1579 /* allocate the blocks.
1580 */ 1580 */
1581 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); 1581 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1582 if (rc == -ENOSPC) { 1582 if (rc == -ENOSPC) {
1583 jfs_error(bmp->db_ipbmap->i_sb, 1583 jfs_error(bmp->db_ipbmap->i_sb,
1584 "dbAllocAny: unable to allocate blocks"); 1584 "dbAllocAny: unable to allocate blocks");
1585 return -EIO; 1585 return -EIO;
1586 } 1586 }
1587 return (rc); 1587 return (rc);
1588 } 1588 }
1589 1589
1590 1590
1591 /* 1591 /*
1592 * NAME: dbFindCtl() 1592 * NAME: dbFindCtl()
1593 * 1593 *
1594 * FUNCTION: starting at a specified dmap control page level and block 1594 * FUNCTION: starting at a specified dmap control page level and block
1595 * number, search down the dmap control levels for a range of 1595 * number, search down the dmap control levels for a range of
1596 * contiguous free blocks large enough to satisfy an allocation 1596 * contiguous free blocks large enough to satisfy an allocation
1597 * request for the specified number of free blocks. 1597 * request for the specified number of free blocks.
1598 * 1598 *
1599 * if sufficient contiguous free blocks are found, this routine 1599 * if sufficient contiguous free blocks are found, this routine
1600 * returns the starting block number within a dmap page that 1600 * returns the starting block number within a dmap page that
1601 * contains or starts a range of contiqious free blocks that 1601 * contains or starts a range of contiqious free blocks that
1602 * is sufficient in size. 1602 * is sufficient in size.
1603 * 1603 *
1604 * PARAMETERS: 1604 * PARAMETERS:
1605 * bmp - pointer to bmap descriptor 1605 * bmp - pointer to bmap descriptor
1606 * level - starting dmap control page level. 1606 * level - starting dmap control page level.
1607 * l2nb - log2 number of contiguous free blocks desired. 1607 * l2nb - log2 number of contiguous free blocks desired.
1608 * *blkno - on entry, starting block number for conducting the search. 1608 * *blkno - on entry, starting block number for conducting the search.
1609 * on successful return, the first block within a dmap page 1609 * on successful return, the first block within a dmap page
1610 * that contains or starts a range of contiguous free blocks. 1610 * that contains or starts a range of contiguous free blocks.
1611 * 1611 *
1612 * RETURN VALUES: 1612 * RETURN VALUES:
1613 * 0 - success 1613 * 0 - success
1614 * -ENOSPC - insufficient disk resources 1614 * -ENOSPC - insufficient disk resources
1615 * -EIO - i/o error 1615 * -EIO - i/o error
1616 * 1616 *
1617 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1617 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1618 */ 1618 */
1619 static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) 1619 static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1620 { 1620 {
1621 int rc, leafidx, lev; 1621 int rc, leafidx, lev;
1622 s64 b, lblkno; 1622 s64 b, lblkno;
1623 struct dmapctl *dcp; 1623 struct dmapctl *dcp;
1624 int budmin; 1624 int budmin;
1625 struct metapage *mp; 1625 struct metapage *mp;
1626 1626
1627 /* starting at the specified dmap control page level and block 1627 /* starting at the specified dmap control page level and block
1628 * number, search down the dmap control levels for the starting 1628 * number, search down the dmap control levels for the starting
1629 * block number of a dmap page that contains or starts off 1629 * block number of a dmap page that contains or starts off
1630 * sufficient free blocks. 1630 * sufficient free blocks.
1631 */ 1631 */
1632 for (lev = level, b = *blkno; lev >= 0; lev--) { 1632 for (lev = level, b = *blkno; lev >= 0; lev--) {
1633 /* get the buffer of the dmap control page for the block 1633 /* get the buffer of the dmap control page for the block
1634 * number and level (i.e. L0, L1, L2). 1634 * number and level (i.e. L0, L1, L2).
1635 */ 1635 */
1636 lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev); 1636 lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev);
1637 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 1637 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1638 if (mp == NULL) 1638 if (mp == NULL)
1639 return -EIO; 1639 return -EIO;
1640 dcp = (struct dmapctl *) mp->data; 1640 dcp = (struct dmapctl *) mp->data;
1641 budmin = dcp->budmin; 1641 budmin = dcp->budmin;
1642 1642
1643 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { 1643 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
1644 jfs_error(bmp->db_ipbmap->i_sb, 1644 jfs_error(bmp->db_ipbmap->i_sb,
1645 "dbFindCtl: Corrupt dmapctl page"); 1645 "dbFindCtl: Corrupt dmapctl page");
1646 release_metapage(mp); 1646 release_metapage(mp);
1647 return -EIO; 1647 return -EIO;
1648 } 1648 }
1649 1649
1650 /* search the tree within the dmap control page for 1650 /* search the tree within the dmap control page for
1651 * sufficent free space. if sufficient free space is found, 1651 * sufficent free space. if sufficient free space is found,
1652 * dbFindLeaf() returns the index of the leaf at which 1652 * dbFindLeaf() returns the index of the leaf at which
1653 * free space was found. 1653 * free space was found.
1654 */ 1654 */
1655 rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx); 1655 rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx);
1656 1656
1657 /* release the buffer. 1657 /* release the buffer.
1658 */ 1658 */
1659 release_metapage(mp); 1659 release_metapage(mp);
1660 1660
1661 /* space found ? 1661 /* space found ?
1662 */ 1662 */
1663 if (rc) { 1663 if (rc) {
1664 if (lev != level) { 1664 if (lev != level) {
1665 jfs_error(bmp->db_ipbmap->i_sb, 1665 jfs_error(bmp->db_ipbmap->i_sb,
1666 "dbFindCtl: dmap inconsistent"); 1666 "dbFindCtl: dmap inconsistent");
1667 return -EIO; 1667 return -EIO;
1668 } 1668 }
1669 return -ENOSPC; 1669 return -ENOSPC;
1670 } 1670 }
1671 1671
1672 /* adjust the block number to reflect the location within 1672 /* adjust the block number to reflect the location within
1673 * the dmap control page (i.e. the leaf) at which free 1673 * the dmap control page (i.e. the leaf) at which free
1674 * space was found. 1674 * space was found.
1675 */ 1675 */
1676 b += (((s64) leafidx) << budmin); 1676 b += (((s64) leafidx) << budmin);
1677 1677
1678 /* we stop the search at this dmap control page level if 1678 /* we stop the search at this dmap control page level if
1679 * the number of blocks required is greater than or equal 1679 * the number of blocks required is greater than or equal
1680 * to the maximum number of blocks described at the next 1680 * to the maximum number of blocks described at the next
1681 * (lower) level. 1681 * (lower) level.
1682 */ 1682 */
1683 if (l2nb >= budmin) 1683 if (l2nb >= budmin)
1684 break; 1684 break;
1685 } 1685 }
1686 1686
1687 *blkno = b; 1687 *blkno = b;
1688 return (0); 1688 return (0);
1689 } 1689 }
1690 1690
1691 1691
1692 /* 1692 /*
1693 * NAME: dbAllocCtl() 1693 * NAME: dbAllocCtl()
1694 * 1694 *
1695 * FUNCTION: attempt to allocate a specified number of contiguous 1695 * FUNCTION: attempt to allocate a specified number of contiguous
1696 * blocks starting within a specific dmap. 1696 * blocks starting within a specific dmap.
1697 * 1697 *
1698 * this routine is called by higher level routines that search 1698 * this routine is called by higher level routines that search
1699 * the dmap control pages above the actual dmaps for contiguous 1699 * the dmap control pages above the actual dmaps for contiguous
1700 * free space. the result of successful searches by these 1700 * free space. the result of successful searches by these
1701 * routines are the starting block numbers within dmaps, with 1701 * routines are the starting block numbers within dmaps, with
1702 * the dmaps themselves containing the desired contiguous free 1702 * the dmaps themselves containing the desired contiguous free
1703 * space or starting a contiguous free space of desired size 1703 * space or starting a contiguous free space of desired size
1704 * that is made up of the blocks of one or more dmaps. these 1704 * that is made up of the blocks of one or more dmaps. these
1705 * calls should not fail due to insufficent resources. 1705 * calls should not fail due to insufficent resources.
1706 * 1706 *
1707 * this routine is called in some cases where it is not known 1707 * this routine is called in some cases where it is not known
1708 * whether it will fail due to insufficient resources. more 1708 * whether it will fail due to insufficient resources. more
1709 * specifically, this occurs when allocating from an allocation 1709 * specifically, this occurs when allocating from an allocation
1710 * group whose size is equal to the number of blocks per dmap. 1710 * group whose size is equal to the number of blocks per dmap.
1711 * in this case, the dmap control pages are not examined prior 1711 * in this case, the dmap control pages are not examined prior
1712 * to calling this routine (to save pathlength) and the call 1712 * to calling this routine (to save pathlength) and the call
1713 * might fail. 1713 * might fail.
1714 * 1714 *
1715 * for a request size that fits within a dmap, this routine relies 1715 * for a request size that fits within a dmap, this routine relies
1716 * upon the dmap's dmtree to find the requested contiguous free 1716 * upon the dmap's dmtree to find the requested contiguous free
1717 * space. for request sizes that are larger than a dmap, the 1717 * space. for request sizes that are larger than a dmap, the
1718 * requested free space will start at the first block of the 1718 * requested free space will start at the first block of the
1719 * first dmap (i.e. blkno). 1719 * first dmap (i.e. blkno).
1720 * 1720 *
1721 * PARAMETERS: 1721 * PARAMETERS:
1722 * bmp - pointer to bmap descriptor 1722 * bmp - pointer to bmap descriptor
1723 * nblocks - actual number of contiguous free blocks to allocate. 1723 * nblocks - actual number of contiguous free blocks to allocate.
1724 * l2nb - log2 number of contiguous free blocks to allocate. 1724 * l2nb - log2 number of contiguous free blocks to allocate.
1725 * blkno - starting block number of the dmap to start the allocation 1725 * blkno - starting block number of the dmap to start the allocation
1726 * from. 1726 * from.
1727 * results - on successful return, set to the starting block number 1727 * results - on successful return, set to the starting block number
1728 * of the newly allocated range. 1728 * of the newly allocated range.
1729 * 1729 *
1730 * RETURN VALUES: 1730 * RETURN VALUES:
1731 * 0 - success 1731 * 0 - success
1732 * -ENOSPC - insufficient disk resources 1732 * -ENOSPC - insufficient disk resources
1733 * -EIO - i/o error 1733 * -EIO - i/o error
1734 * 1734 *
1735 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1735 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1736 */ 1736 */
1737 static int 1737 static int
1738 dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) 1738 dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1739 { 1739 {
1740 int rc, nb; 1740 int rc, nb;
1741 s64 b, lblkno, n; 1741 s64 b, lblkno, n;
1742 struct metapage *mp; 1742 struct metapage *mp;
1743 struct dmap *dp; 1743 struct dmap *dp;
1744 1744
1745 /* check if the allocation request is confined to a single dmap. 1745 /* check if the allocation request is confined to a single dmap.
1746 */ 1746 */
1747 if (l2nb <= L2BPERDMAP) { 1747 if (l2nb <= L2BPERDMAP) {
1748 /* get the buffer for the dmap. 1748 /* get the buffer for the dmap.
1749 */ 1749 */
1750 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 1750 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
1751 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 1751 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1752 if (mp == NULL) 1752 if (mp == NULL)
1753 return -EIO; 1753 return -EIO;
1754 dp = (struct dmap *) mp->data; 1754 dp = (struct dmap *) mp->data;
1755 1755
1756 /* try to allocate the blocks. 1756 /* try to allocate the blocks.
1757 */ 1757 */
1758 rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results); 1758 rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results);
1759 if (rc == 0) 1759 if (rc == 0)
1760 mark_metapage_dirty(mp); 1760 mark_metapage_dirty(mp);
1761 1761
1762 release_metapage(mp); 1762 release_metapage(mp);
1763 1763
1764 return (rc); 1764 return (rc);
1765 } 1765 }
1766 1766
1767 /* allocation request involving multiple dmaps. it must start on 1767 /* allocation request involving multiple dmaps. it must start on
1768 * a dmap boundary. 1768 * a dmap boundary.
1769 */ 1769 */
1770 assert((blkno & (BPERDMAP - 1)) == 0); 1770 assert((blkno & (BPERDMAP - 1)) == 0);
1771 1771
1772 /* allocate the blocks dmap by dmap. 1772 /* allocate the blocks dmap by dmap.
1773 */ 1773 */
1774 for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) { 1774 for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) {
1775 /* get the buffer for the dmap. 1775 /* get the buffer for the dmap.
1776 */ 1776 */
1777 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); 1777 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
1778 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 1778 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1779 if (mp == NULL) { 1779 if (mp == NULL) {
1780 rc = -EIO; 1780 rc = -EIO;
1781 goto backout; 1781 goto backout;
1782 } 1782 }
1783 dp = (struct dmap *) mp->data; 1783 dp = (struct dmap *) mp->data;
1784 1784
1785 /* the dmap better be all free. 1785 /* the dmap better be all free.
1786 */ 1786 */
1787 if (dp->tree.stree[ROOT] != L2BPERDMAP) { 1787 if (dp->tree.stree[ROOT] != L2BPERDMAP) {
1788 release_metapage(mp); 1788 release_metapage(mp);
1789 jfs_error(bmp->db_ipbmap->i_sb, 1789 jfs_error(bmp->db_ipbmap->i_sb,
1790 "dbAllocCtl: the dmap is not all free"); 1790 "dbAllocCtl: the dmap is not all free");
1791 rc = -EIO; 1791 rc = -EIO;
1792 goto backout; 1792 goto backout;
1793 } 1793 }
1794 1794
1795 /* determine how many blocks to allocate from this dmap. 1795 /* determine how many blocks to allocate from this dmap.
1796 */ 1796 */
1797 nb = min(n, (s64)BPERDMAP); 1797 nb = min(n, (s64)BPERDMAP);
1798 1798
1799 /* allocate the blocks from the dmap. 1799 /* allocate the blocks from the dmap.
1800 */ 1800 */
1801 if ((rc = dbAllocDmap(bmp, dp, b, nb))) { 1801 if ((rc = dbAllocDmap(bmp, dp, b, nb))) {
1802 release_metapage(mp); 1802 release_metapage(mp);
1803 goto backout; 1803 goto backout;
1804 } 1804 }
1805 1805
1806 /* write the buffer. 1806 /* write the buffer.
1807 */ 1807 */
1808 write_metapage(mp); 1808 write_metapage(mp);
1809 } 1809 }
1810 1810
1811 /* set the results (starting block number) and return. 1811 /* set the results (starting block number) and return.
1812 */ 1812 */
1813 *results = blkno; 1813 *results = blkno;
1814 return (0); 1814 return (0);
1815 1815
1816 /* something failed in handling an allocation request involving 1816 /* something failed in handling an allocation request involving
1817 * multiple dmaps. we'll try to clean up by backing out any 1817 * multiple dmaps. we'll try to clean up by backing out any
1818 * allocation that has already happened for this request. if 1818 * allocation that has already happened for this request. if
1819 * we fail in backing out the allocation, we'll mark the file 1819 * we fail in backing out the allocation, we'll mark the file
1820 * system to indicate that blocks have been leaked. 1820 * system to indicate that blocks have been leaked.
1821 */ 1821 */
1822 backout: 1822 backout:
1823 1823
1824 /* try to backout the allocations dmap by dmap. 1824 /* try to backout the allocations dmap by dmap.
1825 */ 1825 */
1826 for (n = nblocks - n, b = blkno; n > 0; 1826 for (n = nblocks - n, b = blkno; n > 0;
1827 n -= BPERDMAP, b += BPERDMAP) { 1827 n -= BPERDMAP, b += BPERDMAP) {
1828 /* get the buffer for this dmap. 1828 /* get the buffer for this dmap.
1829 */ 1829 */
1830 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); 1830 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
1831 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 1831 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1832 if (mp == NULL) { 1832 if (mp == NULL) {
1833 /* could not back out. mark the file system 1833 /* could not back out. mark the file system
1834 * to indicate that we have leaked blocks. 1834 * to indicate that we have leaked blocks.
1835 */ 1835 */
1836 jfs_error(bmp->db_ipbmap->i_sb, 1836 jfs_error(bmp->db_ipbmap->i_sb,
1837 "dbAllocCtl: I/O Error: Block Leakage."); 1837 "dbAllocCtl: I/O Error: Block Leakage.");
1838 continue; 1838 continue;
1839 } 1839 }
1840 dp = (struct dmap *) mp->data; 1840 dp = (struct dmap *) mp->data;
1841 1841
1842 /* free the blocks is this dmap. 1842 /* free the blocks is this dmap.
1843 */ 1843 */
1844 if (dbFreeDmap(bmp, dp, b, BPERDMAP)) { 1844 if (dbFreeDmap(bmp, dp, b, BPERDMAP)) {
1845 /* could not back out. mark the file system 1845 /* could not back out. mark the file system
1846 * to indicate that we have leaked blocks. 1846 * to indicate that we have leaked blocks.
1847 */ 1847 */
1848 release_metapage(mp); 1848 release_metapage(mp);
1849 jfs_error(bmp->db_ipbmap->i_sb, 1849 jfs_error(bmp->db_ipbmap->i_sb,
1850 "dbAllocCtl: Block Leakage."); 1850 "dbAllocCtl: Block Leakage.");
1851 continue; 1851 continue;
1852 } 1852 }
1853 1853
1854 /* write the buffer. 1854 /* write the buffer.
1855 */ 1855 */
1856 write_metapage(mp); 1856 write_metapage(mp);
1857 } 1857 }
1858 1858
1859 return (rc); 1859 return (rc);
1860 } 1860 }
1861 1861
1862 1862
1863 /* 1863 /*
1864 * NAME: dbAllocDmapLev() 1864 * NAME: dbAllocDmapLev()
1865 * 1865 *
1866 * FUNCTION: attempt to allocate a specified number of contiguous blocks 1866 * FUNCTION: attempt to allocate a specified number of contiguous blocks
1867 * from a specified dmap. 1867 * from a specified dmap.
1868 * 1868 *
1869 * this routine checks if the contiguous blocks are available. 1869 * this routine checks if the contiguous blocks are available.
1870 * if so, nblocks of blocks are allocated; otherwise, ENOSPC is 1870 * if so, nblocks of blocks are allocated; otherwise, ENOSPC is
1871 * returned. 1871 * returned.
1872 * 1872 *
1873 * PARAMETERS: 1873 * PARAMETERS:
1874 * mp - pointer to bmap descriptor 1874 * mp - pointer to bmap descriptor
1875 * dp - pointer to dmap to attempt to allocate blocks from. 1875 * dp - pointer to dmap to attempt to allocate blocks from.
1876 * l2nb - log2 number of contiguous block desired. 1876 * l2nb - log2 number of contiguous block desired.
1877 * nblocks - actual number of contiguous block desired. 1877 * nblocks - actual number of contiguous block desired.
1878 * results - on successful return, set to the starting block number 1878 * results - on successful return, set to the starting block number
1879 * of the newly allocated range. 1879 * of the newly allocated range.
1880 * 1880 *
1881 * RETURN VALUES: 1881 * RETURN VALUES:
1882 * 0 - success 1882 * 0 - success
1883 * -ENOSPC - insufficient disk resources 1883 * -ENOSPC - insufficient disk resources
1884 * -EIO - i/o error 1884 * -EIO - i/o error
1885 * 1885 *
1886 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or 1886 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
1887 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; 1887 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
1888 */ 1888 */
1889 static int 1889 static int
1890 dbAllocDmapLev(struct bmap * bmp, 1890 dbAllocDmapLev(struct bmap * bmp,
1891 struct dmap * dp, int nblocks, int l2nb, s64 * results) 1891 struct dmap * dp, int nblocks, int l2nb, s64 * results)
1892 { 1892 {
1893 s64 blkno; 1893 s64 blkno;
1894 int leafidx, rc; 1894 int leafidx, rc;
1895 1895
1896 /* can't be more than a dmaps worth of blocks */ 1896 /* can't be more than a dmaps worth of blocks */
1897 assert(l2nb <= L2BPERDMAP); 1897 assert(l2nb <= L2BPERDMAP);
1898 1898
1899 /* search the tree within the dmap page for sufficient 1899 /* search the tree within the dmap page for sufficient
1900 * free space. if sufficient free space is found, dbFindLeaf() 1900 * free space. if sufficient free space is found, dbFindLeaf()
1901 * returns the index of the leaf at which free space was found. 1901 * returns the index of the leaf at which free space was found.
1902 */ 1902 */
1903 if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx)) 1903 if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
1904 return -ENOSPC; 1904 return -ENOSPC;
1905 1905
1906 /* determine the block number within the file system corresponding 1906 /* determine the block number within the file system corresponding
1907 * to the leaf at which free space was found. 1907 * to the leaf at which free space was found.
1908 */ 1908 */
1909 blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD); 1909 blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD);
1910 1910
1911 /* if not all bits of the dmap word are free, get the starting 1911 /* if not all bits of the dmap word are free, get the starting
1912 * bit number within the dmap word of the required string of free 1912 * bit number within the dmap word of the required string of free
1913 * bits and adjust the block number with this value. 1913 * bits and adjust the block number with this value.
1914 */ 1914 */
1915 if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN) 1915 if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN)
1916 blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb); 1916 blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb);
1917 1917
1918 /* allocate the blocks */ 1918 /* allocate the blocks */
1919 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) 1919 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
1920 *results = blkno; 1920 *results = blkno;
1921 1921
1922 return (rc); 1922 return (rc);
1923 } 1923 }
1924 1924
1925 1925
1926 /* 1926 /*
1927 * NAME: dbAllocDmap() 1927 * NAME: dbAllocDmap()
1928 * 1928 *
1929 * FUNCTION: adjust the disk allocation map to reflect the allocation 1929 * FUNCTION: adjust the disk allocation map to reflect the allocation
1930 * of a specified block range within a dmap. 1930 * of a specified block range within a dmap.
1931 * 1931 *
1932 * this routine allocates the specified blocks from the dmap 1932 * this routine allocates the specified blocks from the dmap
1933 * through a call to dbAllocBits(). if the allocation of the 1933 * through a call to dbAllocBits(). if the allocation of the
1934 * block range causes the maximum string of free blocks within 1934 * block range causes the maximum string of free blocks within
1935 * the dmap to change (i.e. the value of the root of the dmap's 1935 * the dmap to change (i.e. the value of the root of the dmap's
1936 * dmtree), this routine will cause this change to be reflected 1936 * dmtree), this routine will cause this change to be reflected
1937 * up through the appropriate levels of the dmap control pages 1937 * up through the appropriate levels of the dmap control pages
1938 * by a call to dbAdjCtl() for the L0 dmap control page that 1938 * by a call to dbAdjCtl() for the L0 dmap control page that
1939 * covers this dmap. 1939 * covers this dmap.
1940 * 1940 *
1941 * PARAMETERS: 1941 * PARAMETERS:
1942 * bmp - pointer to bmap descriptor 1942 * bmp - pointer to bmap descriptor
1943 * dp - pointer to dmap to allocate the block range from. 1943 * dp - pointer to dmap to allocate the block range from.
1944 * blkno - starting block number of the block to be allocated. 1944 * blkno - starting block number of the block to be allocated.
1945 * nblocks - number of blocks to be allocated. 1945 * nblocks - number of blocks to be allocated.
1946 * 1946 *
1947 * RETURN VALUES: 1947 * RETURN VALUES:
1948 * 0 - success 1948 * 0 - success
1949 * -EIO - i/o error 1949 * -EIO - i/o error
1950 * 1950 *
1951 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 1951 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
1952 */ 1952 */
1953 static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 1953 static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
1954 int nblocks) 1954 int nblocks)
1955 { 1955 {
1956 s8 oldroot; 1956 s8 oldroot;
1957 int rc; 1957 int rc;
1958 1958
1959 /* save the current value of the root (i.e. maximum free string) 1959 /* save the current value of the root (i.e. maximum free string)
1960 * of the dmap tree. 1960 * of the dmap tree.
1961 */ 1961 */
1962 oldroot = dp->tree.stree[ROOT]; 1962 oldroot = dp->tree.stree[ROOT];
1963 1963
1964 /* allocate the specified (blocks) bits */ 1964 /* allocate the specified (blocks) bits */
1965 dbAllocBits(bmp, dp, blkno, nblocks); 1965 dbAllocBits(bmp, dp, blkno, nblocks);
1966 1966
1967 /* if the root has not changed, done. */ 1967 /* if the root has not changed, done. */
1968 if (dp->tree.stree[ROOT] == oldroot) 1968 if (dp->tree.stree[ROOT] == oldroot)
1969 return (0); 1969 return (0);
1970 1970
1971 /* root changed. bubble the change up to the dmap control pages. 1971 /* root changed. bubble the change up to the dmap control pages.
1972 * if the adjustment of the upper level control pages fails, 1972 * if the adjustment of the upper level control pages fails,
1973 * backout the bit allocation (thus making everything consistent). 1973 * backout the bit allocation (thus making everything consistent).
1974 */ 1974 */
1975 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0))) 1975 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0)))
1976 dbFreeBits(bmp, dp, blkno, nblocks); 1976 dbFreeBits(bmp, dp, blkno, nblocks);
1977 1977
1978 return (rc); 1978 return (rc);
1979 } 1979 }
1980 1980
1981 1981
1982 /* 1982 /*
1983 * NAME: dbFreeDmap() 1983 * NAME: dbFreeDmap()
1984 * 1984 *
1985 * FUNCTION: adjust the disk allocation map to reflect the allocation 1985 * FUNCTION: adjust the disk allocation map to reflect the allocation
1986 * of a specified block range within a dmap. 1986 * of a specified block range within a dmap.
1987 * 1987 *
1988 * this routine frees the specified blocks from the dmap through 1988 * this routine frees the specified blocks from the dmap through
1989 * a call to dbFreeBits(). if the deallocation of the block range 1989 * a call to dbFreeBits(). if the deallocation of the block range
1990 * causes the maximum string of free blocks within the dmap to 1990 * causes the maximum string of free blocks within the dmap to
1991 * change (i.e. the value of the root of the dmap's dmtree), this 1991 * change (i.e. the value of the root of the dmap's dmtree), this
1992 * routine will cause this change to be reflected up through the 1992 * routine will cause this change to be reflected up through the
1993 * appropriate levels of the dmap control pages by a call to 1993 * appropriate levels of the dmap control pages by a call to
1994 * dbAdjCtl() for the L0 dmap control page that covers this dmap. 1994 * dbAdjCtl() for the L0 dmap control page that covers this dmap.
1995 * 1995 *
1996 * PARAMETERS: 1996 * PARAMETERS:
1997 * bmp - pointer to bmap descriptor 1997 * bmp - pointer to bmap descriptor
1998 * dp - pointer to dmap to free the block range from. 1998 * dp - pointer to dmap to free the block range from.
1999 * blkno - starting block number of the block to be freed. 1999 * blkno - starting block number of the block to be freed.
2000 * nblocks - number of blocks to be freed. 2000 * nblocks - number of blocks to be freed.
2001 * 2001 *
2002 * RETURN VALUES: 2002 * RETURN VALUES:
2003 * 0 - success 2003 * 0 - success
2004 * -EIO - i/o error 2004 * -EIO - i/o error
2005 * 2005 *
2006 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2006 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2007 */ 2007 */
2008 static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 2008 static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2009 int nblocks) 2009 int nblocks)
2010 { 2010 {
2011 s8 oldroot; 2011 s8 oldroot;
2012 int rc = 0, word; 2012 int rc = 0, word;
2013 2013
2014 /* save the current value of the root (i.e. maximum free string) 2014 /* save the current value of the root (i.e. maximum free string)
2015 * of the dmap tree. 2015 * of the dmap tree.
2016 */ 2016 */
2017 oldroot = dp->tree.stree[ROOT]; 2017 oldroot = dp->tree.stree[ROOT];
2018 2018
2019 /* free the specified (blocks) bits */ 2019 /* free the specified (blocks) bits */
2020 rc = dbFreeBits(bmp, dp, blkno, nblocks); 2020 rc = dbFreeBits(bmp, dp, blkno, nblocks);
2021 2021
2022 /* if error or the root has not changed, done. */ 2022 /* if error or the root has not changed, done. */
2023 if (rc || (dp->tree.stree[ROOT] == oldroot)) 2023 if (rc || (dp->tree.stree[ROOT] == oldroot))
2024 return (rc); 2024 return (rc);
2025 2025
2026 /* root changed. bubble the change up to the dmap control pages. 2026 /* root changed. bubble the change up to the dmap control pages.
2027 * if the adjustment of the upper level control pages fails, 2027 * if the adjustment of the upper level control pages fails,
2028 * backout the deallocation. 2028 * backout the deallocation.
2029 */ 2029 */
2030 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { 2030 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) {
2031 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; 2031 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
2032 2032
2033 /* as part of backing out the deallocation, we will have 2033 /* as part of backing out the deallocation, we will have
2034 * to back split the dmap tree if the deallocation caused 2034 * to back split the dmap tree if the deallocation caused
2035 * the freed blocks to become part of a larger binary buddy 2035 * the freed blocks to become part of a larger binary buddy
2036 * system. 2036 * system.
2037 */ 2037 */
2038 if (dp->tree.stree[word] == NOFREE) 2038 if (dp->tree.stree[word] == NOFREE)
2039 dbBackSplit((dmtree_t *) & dp->tree, word); 2039 dbBackSplit((dmtree_t *) & dp->tree, word);
2040 2040
2041 dbAllocBits(bmp, dp, blkno, nblocks); 2041 dbAllocBits(bmp, dp, blkno, nblocks);
2042 } 2042 }
2043 2043
2044 return (rc); 2044 return (rc);
2045 } 2045 }
2046 2046
2047 2047
2048 /* 2048 /*
2049 * NAME: dbAllocBits() 2049 * NAME: dbAllocBits()
2050 * 2050 *
2051 * FUNCTION: allocate a specified block range from a dmap. 2051 * FUNCTION: allocate a specified block range from a dmap.
2052 * 2052 *
2053 * this routine updates the dmap to reflect the working 2053 * this routine updates the dmap to reflect the working
2054 * state allocation of the specified block range. it directly 2054 * state allocation of the specified block range. it directly
2055 * updates the bits of the working map and causes the adjustment 2055 * updates the bits of the working map and causes the adjustment
2056 * of the binary buddy system described by the dmap's dmtree 2056 * of the binary buddy system described by the dmap's dmtree
2057 * leaves to reflect the bits allocated. it also causes the 2057 * leaves to reflect the bits allocated. it also causes the
2058 * dmap's dmtree, as a whole, to reflect the allocated range. 2058 * dmap's dmtree, as a whole, to reflect the allocated range.
2059 * 2059 *
2060 * PARAMETERS: 2060 * PARAMETERS:
2061 * bmp - pointer to bmap descriptor 2061 * bmp - pointer to bmap descriptor
2062 * dp - pointer to dmap to allocate bits from. 2062 * dp - pointer to dmap to allocate bits from.
2063 * blkno - starting block number of the bits to be allocated. 2063 * blkno - starting block number of the bits to be allocated.
2064 * nblocks - number of bits to be allocated. 2064 * nblocks - number of bits to be allocated.
2065 * 2065 *
2066 * RETURN VALUES: none 2066 * RETURN VALUES: none
2067 * 2067 *
2068 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2068 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2069 */ 2069 */
2070 static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 2070 static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2071 int nblocks) 2071 int nblocks)
2072 { 2072 {
2073 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; 2073 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2074 dmtree_t *tp = (dmtree_t *) & dp->tree; 2074 dmtree_t *tp = (dmtree_t *) & dp->tree;
2075 int size; 2075 int size;
2076 s8 *leaf; 2076 s8 *leaf;
2077 2077
2078 /* pick up a pointer to the leaves of the dmap tree */ 2078 /* pick up a pointer to the leaves of the dmap tree */
2079 leaf = dp->tree.stree + LEAFIND; 2079 leaf = dp->tree.stree + LEAFIND;
2080 2080
2081 /* determine the bit number and word within the dmap of the 2081 /* determine the bit number and word within the dmap of the
2082 * starting block. 2082 * starting block.
2083 */ 2083 */
2084 dbitno = blkno & (BPERDMAP - 1); 2084 dbitno = blkno & (BPERDMAP - 1);
2085 word = dbitno >> L2DBWORD; 2085 word = dbitno >> L2DBWORD;
2086 2086
2087 /* block range better be within the dmap */ 2087 /* block range better be within the dmap */
2088 assert(dbitno + nblocks <= BPERDMAP); 2088 assert(dbitno + nblocks <= BPERDMAP);
2089 2089
2090 /* allocate the bits of the dmap's words corresponding to the block 2090 /* allocate the bits of the dmap's words corresponding to the block
2091 * range. not all bits of the first and last words may be contained 2091 * range. not all bits of the first and last words may be contained
2092 * within the block range. if this is the case, we'll work against 2092 * within the block range. if this is the case, we'll work against
2093 * those words (i.e. partial first and/or last) on an individual basis 2093 * those words (i.e. partial first and/or last) on an individual basis
2094 * (a single pass), allocating the bits of interest by hand and 2094 * (a single pass), allocating the bits of interest by hand and
2095 * updating the leaf corresponding to the dmap word. a single pass 2095 * updating the leaf corresponding to the dmap word. a single pass
2096 * will be used for all dmap words fully contained within the 2096 * will be used for all dmap words fully contained within the
2097 * specified range. within this pass, the bits of all fully contained 2097 * specified range. within this pass, the bits of all fully contained
2098 * dmap words will be marked as free in a single shot and the leaves 2098 * dmap words will be marked as free in a single shot and the leaves
2099 * will be updated. a single leaf may describe the free space of 2099 * will be updated. a single leaf may describe the free space of
2100 * multiple dmap words, so we may update only a subset of the actual 2100 * multiple dmap words, so we may update only a subset of the actual
2101 * leaves corresponding to the dmap words of the block range. 2101 * leaves corresponding to the dmap words of the block range.
2102 */ 2102 */
2103 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { 2103 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
2104 /* determine the bit number within the word and 2104 /* determine the bit number within the word and
2105 * the number of bits within the word. 2105 * the number of bits within the word.
2106 */ 2106 */
2107 wbitno = dbitno & (DBWORD - 1); 2107 wbitno = dbitno & (DBWORD - 1);
2108 nb = min(rembits, DBWORD - wbitno); 2108 nb = min(rembits, DBWORD - wbitno);
2109 2109
2110 /* check if only part of a word is to be allocated. 2110 /* check if only part of a word is to be allocated.
2111 */ 2111 */
2112 if (nb < DBWORD) { 2112 if (nb < DBWORD) {
2113 /* allocate (set to 1) the appropriate bits within 2113 /* allocate (set to 1) the appropriate bits within
2114 * this dmap word. 2114 * this dmap word.
2115 */ 2115 */
2116 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) 2116 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
2117 >> wbitno); 2117 >> wbitno);
2118 2118
2119 /* update the leaf for this dmap word. in addition 2119 /* update the leaf for this dmap word. in addition
2120 * to setting the leaf value to the binary buddy max 2120 * to setting the leaf value to the binary buddy max
2121 * of the updated dmap word, dbSplit() will split 2121 * of the updated dmap word, dbSplit() will split
2122 * the binary system of the leaves if need be. 2122 * the binary system of the leaves if need be.
2123 */ 2123 */
2124 dbSplit(tp, word, BUDMIN, 2124 dbSplit(tp, word, BUDMIN,
2125 dbMaxBud((u8 *) & dp->wmap[word])); 2125 dbMaxBud((u8 *) & dp->wmap[word]));
2126 2126
2127 word += 1; 2127 word += 1;
2128 } else { 2128 } else {
2129 /* one or more dmap words are fully contained 2129 /* one or more dmap words are fully contained
2130 * within the block range. determine how many 2130 * within the block range. determine how many
2131 * words and allocate (set to 1) the bits of these 2131 * words and allocate (set to 1) the bits of these
2132 * words. 2132 * words.
2133 */ 2133 */
2134 nwords = rembits >> L2DBWORD; 2134 nwords = rembits >> L2DBWORD;
2135 memset(&dp->wmap[word], (int) ONES, nwords * 4); 2135 memset(&dp->wmap[word], (int) ONES, nwords * 4);
2136 2136
2137 /* determine how many bits. 2137 /* determine how many bits.
2138 */ 2138 */
2139 nb = nwords << L2DBWORD; 2139 nb = nwords << L2DBWORD;
2140 2140
2141 /* now update the appropriate leaves to reflect 2141 /* now update the appropriate leaves to reflect
2142 * the allocated words. 2142 * the allocated words.
2143 */ 2143 */
2144 for (; nwords > 0; nwords -= nw) { 2144 for (; nwords > 0; nwords -= nw) {
2145 if (leaf[word] < BUDMIN) { 2145 if (leaf[word] < BUDMIN) {
2146 jfs_error(bmp->db_ipbmap->i_sb, 2146 jfs_error(bmp->db_ipbmap->i_sb,
2147 "dbAllocBits: leaf page " 2147 "dbAllocBits: leaf page "
2148 "corrupt"); 2148 "corrupt");
2149 break; 2149 break;
2150 } 2150 }
2151 2151
2152 /* determine what the leaf value should be 2152 /* determine what the leaf value should be
2153 * updated to as the minimum of the l2 number 2153 * updated to as the minimum of the l2 number
2154 * of bits being allocated and the l2 number 2154 * of bits being allocated and the l2 number
2155 * of bits currently described by this leaf. 2155 * of bits currently described by this leaf.
2156 */ 2156 */
2157 size = min((int)leaf[word], NLSTOL2BSZ(nwords)); 2157 size = min((int)leaf[word], NLSTOL2BSZ(nwords));
2158 2158
2159 /* update the leaf to reflect the allocation. 2159 /* update the leaf to reflect the allocation.
2160 * in addition to setting the leaf value to 2160 * in addition to setting the leaf value to
2161 * NOFREE, dbSplit() will split the binary 2161 * NOFREE, dbSplit() will split the binary
2162 * system of the leaves to reflect the current 2162 * system of the leaves to reflect the current
2163 * allocation (size). 2163 * allocation (size).
2164 */ 2164 */
2165 dbSplit(tp, word, size, NOFREE); 2165 dbSplit(tp, word, size, NOFREE);
2166 2166
2167 /* get the number of dmap words handled */ 2167 /* get the number of dmap words handled */
2168 nw = BUDSIZE(size, BUDMIN); 2168 nw = BUDSIZE(size, BUDMIN);
2169 word += nw; 2169 word += nw;
2170 } 2170 }
2171 } 2171 }
2172 } 2172 }
2173 2173
2174 /* update the free count for this dmap */ 2174 /* update the free count for this dmap */
2175 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); 2175 le32_add_cpu(&dp->nfree, -nblocks);
2176 2176
2177 BMAP_LOCK(bmp); 2177 BMAP_LOCK(bmp);
2178 2178
2179 /* if this allocation group is completely free, 2179 /* if this allocation group is completely free,
2180 * update the maximum allocation group number if this allocation 2180 * update the maximum allocation group number if this allocation
2181 * group is the new max. 2181 * group is the new max.
2182 */ 2182 */
2183 agno = blkno >> bmp->db_agl2size; 2183 agno = blkno >> bmp->db_agl2size;
2184 if (agno > bmp->db_maxag) 2184 if (agno > bmp->db_maxag)
2185 bmp->db_maxag = agno; 2185 bmp->db_maxag = agno;
2186 2186
2187 /* update the free count for the allocation group and map */ 2187 /* update the free count for the allocation group and map */
2188 bmp->db_agfree[agno] -= nblocks; 2188 bmp->db_agfree[agno] -= nblocks;
2189 bmp->db_nfree -= nblocks; 2189 bmp->db_nfree -= nblocks;
2190 2190
2191 BMAP_UNLOCK(bmp); 2191 BMAP_UNLOCK(bmp);
2192 } 2192 }
2193 2193
2194 2194
2195 /* 2195 /*
2196 * NAME: dbFreeBits() 2196 * NAME: dbFreeBits()
2197 * 2197 *
2198 * FUNCTION: free a specified block range from a dmap. 2198 * FUNCTION: free a specified block range from a dmap.
2199 * 2199 *
2200 * this routine updates the dmap to reflect the working 2200 * this routine updates the dmap to reflect the working
2201 * state allocation of the specified block range. it directly 2201 * state allocation of the specified block range. it directly
2202 * updates the bits of the working map and causes the adjustment 2202 * updates the bits of the working map and causes the adjustment
2203 * of the binary buddy system described by the dmap's dmtree 2203 * of the binary buddy system described by the dmap's dmtree
2204 * leaves to reflect the bits freed. it also causes the dmap's 2204 * leaves to reflect the bits freed. it also causes the dmap's
2205 * dmtree, as a whole, to reflect the deallocated range. 2205 * dmtree, as a whole, to reflect the deallocated range.
2206 * 2206 *
2207 * PARAMETERS: 2207 * PARAMETERS:
2208 * bmp - pointer to bmap descriptor 2208 * bmp - pointer to bmap descriptor
2209 * dp - pointer to dmap to free bits from. 2209 * dp - pointer to dmap to free bits from.
2210 * blkno - starting block number of the bits to be freed. 2210 * blkno - starting block number of the bits to be freed.
2211 * nblocks - number of bits to be freed. 2211 * nblocks - number of bits to be freed.
2212 * 2212 *
2213 * RETURN VALUES: 0 for success 2213 * RETURN VALUES: 0 for success
2214 * 2214 *
2215 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2215 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2216 */ 2216 */
2217 static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 2217 static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2218 int nblocks) 2218 int nblocks)
2219 { 2219 {
2220 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; 2220 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2221 dmtree_t *tp = (dmtree_t *) & dp->tree; 2221 dmtree_t *tp = (dmtree_t *) & dp->tree;
2222 int rc = 0; 2222 int rc = 0;
2223 int size; 2223 int size;
2224 2224
2225 /* determine the bit number and word within the dmap of the 2225 /* determine the bit number and word within the dmap of the
2226 * starting block. 2226 * starting block.
2227 */ 2227 */
2228 dbitno = blkno & (BPERDMAP - 1); 2228 dbitno = blkno & (BPERDMAP - 1);
2229 word = dbitno >> L2DBWORD; 2229 word = dbitno >> L2DBWORD;
2230 2230
2231 /* block range better be within the dmap. 2231 /* block range better be within the dmap.
2232 */ 2232 */
2233 assert(dbitno + nblocks <= BPERDMAP); 2233 assert(dbitno + nblocks <= BPERDMAP);
2234 2234
2235 /* free the bits of the dmaps words corresponding to the block range. 2235 /* free the bits of the dmaps words corresponding to the block range.
2236 * not all bits of the first and last words may be contained within 2236 * not all bits of the first and last words may be contained within
2237 * the block range. if this is the case, we'll work against those 2237 * the block range. if this is the case, we'll work against those
2238 * words (i.e. partial first and/or last) on an individual basis 2238 * words (i.e. partial first and/or last) on an individual basis
2239 * (a single pass), freeing the bits of interest by hand and updating 2239 * (a single pass), freeing the bits of interest by hand and updating
2240 * the leaf corresponding to the dmap word. a single pass will be used 2240 * the leaf corresponding to the dmap word. a single pass will be used
2241 * for all dmap words fully contained within the specified range. 2241 * for all dmap words fully contained within the specified range.
2242 * within this pass, the bits of all fully contained dmap words will 2242 * within this pass, the bits of all fully contained dmap words will
2243 * be marked as free in a single shot and the leaves will be updated. a 2243 * be marked as free in a single shot and the leaves will be updated. a
2244 * single leaf may describe the free space of multiple dmap words, 2244 * single leaf may describe the free space of multiple dmap words,
2245 * so we may update only a subset of the actual leaves corresponding 2245 * so we may update only a subset of the actual leaves corresponding
2246 * to the dmap words of the block range. 2246 * to the dmap words of the block range.
2247 * 2247 *
2248 * dbJoin() is used to update leaf values and will join the binary 2248 * dbJoin() is used to update leaf values and will join the binary
2249 * buddy system of the leaves if the new leaf values indicate this 2249 * buddy system of the leaves if the new leaf values indicate this
2250 * should be done. 2250 * should be done.
2251 */ 2251 */
2252 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { 2252 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
2253 /* determine the bit number within the word and 2253 /* determine the bit number within the word and
2254 * the number of bits within the word. 2254 * the number of bits within the word.
2255 */ 2255 */
2256 wbitno = dbitno & (DBWORD - 1); 2256 wbitno = dbitno & (DBWORD - 1);
2257 nb = min(rembits, DBWORD - wbitno); 2257 nb = min(rembits, DBWORD - wbitno);
2258 2258
2259 /* check if only part of a word is to be freed. 2259 /* check if only part of a word is to be freed.
2260 */ 2260 */
2261 if (nb < DBWORD) { 2261 if (nb < DBWORD) {
2262 /* free (zero) the appropriate bits within this 2262 /* free (zero) the appropriate bits within this
2263 * dmap word. 2263 * dmap word.
2264 */ 2264 */
2265 dp->wmap[word] &= 2265 dp->wmap[word] &=
2266 cpu_to_le32(~(ONES << (DBWORD - nb) 2266 cpu_to_le32(~(ONES << (DBWORD - nb)
2267 >> wbitno)); 2267 >> wbitno));
2268 2268
2269 /* update the leaf for this dmap word. 2269 /* update the leaf for this dmap word.
2270 */ 2270 */
2271 rc = dbJoin(tp, word, 2271 rc = dbJoin(tp, word,
2272 dbMaxBud((u8 *) & dp->wmap[word])); 2272 dbMaxBud((u8 *) & dp->wmap[word]));
2273 if (rc) 2273 if (rc)
2274 return rc; 2274 return rc;
2275 2275
2276 word += 1; 2276 word += 1;
2277 } else { 2277 } else {
2278 /* one or more dmap words are fully contained 2278 /* one or more dmap words are fully contained
2279 * within the block range. determine how many 2279 * within the block range. determine how many
2280 * words and free (zero) the bits of these words. 2280 * words and free (zero) the bits of these words.
2281 */ 2281 */
2282 nwords = rembits >> L2DBWORD; 2282 nwords = rembits >> L2DBWORD;
2283 memset(&dp->wmap[word], 0, nwords * 4); 2283 memset(&dp->wmap[word], 0, nwords * 4);
2284 2284
2285 /* determine how many bits. 2285 /* determine how many bits.
2286 */ 2286 */
2287 nb = nwords << L2DBWORD; 2287 nb = nwords << L2DBWORD;
2288 2288
2289 /* now update the appropriate leaves to reflect 2289 /* now update the appropriate leaves to reflect
2290 * the freed words. 2290 * the freed words.
2291 */ 2291 */
2292 for (; nwords > 0; nwords -= nw) { 2292 for (; nwords > 0; nwords -= nw) {
2293 /* determine what the leaf value should be 2293 /* determine what the leaf value should be
2294 * updated to as the minimum of the l2 number 2294 * updated to as the minimum of the l2 number
2295 * of bits being freed and the l2 (max) number 2295 * of bits being freed and the l2 (max) number
2296 * of bits that can be described by this leaf. 2296 * of bits that can be described by this leaf.
2297 */ 2297 */
2298 size = 2298 size =
2299 min(LITOL2BSZ 2299 min(LITOL2BSZ
2300 (word, L2LPERDMAP, BUDMIN), 2300 (word, L2LPERDMAP, BUDMIN),
2301 NLSTOL2BSZ(nwords)); 2301 NLSTOL2BSZ(nwords));
2302 2302
2303 /* update the leaf. 2303 /* update the leaf.
2304 */ 2304 */
2305 rc = dbJoin(tp, word, size); 2305 rc = dbJoin(tp, word, size);
2306 if (rc) 2306 if (rc)
2307 return rc; 2307 return rc;
2308 2308
2309 /* get the number of dmap words handled. 2309 /* get the number of dmap words handled.
2310 */ 2310 */
2311 nw = BUDSIZE(size, BUDMIN); 2311 nw = BUDSIZE(size, BUDMIN);
2312 word += nw; 2312 word += nw;
2313 } 2313 }
2314 } 2314 }
2315 } 2315 }
2316 2316
2317 /* update the free count for this dmap. 2317 /* update the free count for this dmap.
2318 */ 2318 */
2319 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); 2319 le32_add_cpu(&dp->nfree, nblocks);
2320 2320
2321 BMAP_LOCK(bmp); 2321 BMAP_LOCK(bmp);
2322 2322
2323 /* update the free count for the allocation group and 2323 /* update the free count for the allocation group and
2324 * map. 2324 * map.
2325 */ 2325 */
2326 agno = blkno >> bmp->db_agl2size; 2326 agno = blkno >> bmp->db_agl2size;
2327 bmp->db_nfree += nblocks; 2327 bmp->db_nfree += nblocks;
2328 bmp->db_agfree[agno] += nblocks; 2328 bmp->db_agfree[agno] += nblocks;
2329 2329
2330 /* check if this allocation group is not completely free and 2330 /* check if this allocation group is not completely free and
2331 * if it is currently the maximum (rightmost) allocation group. 2331 * if it is currently the maximum (rightmost) allocation group.
2332 * if so, establish the new maximum allocation group number by 2332 * if so, establish the new maximum allocation group number by
2333 * searching left for the first allocation group with allocation. 2333 * searching left for the first allocation group with allocation.
2334 */ 2334 */
2335 if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) || 2335 if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) ||
2336 (agno == bmp->db_numag - 1 && 2336 (agno == bmp->db_numag - 1 &&
2337 bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) { 2337 bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) {
2338 while (bmp->db_maxag > 0) { 2338 while (bmp->db_maxag > 0) {
2339 bmp->db_maxag -= 1; 2339 bmp->db_maxag -= 1;
2340 if (bmp->db_agfree[bmp->db_maxag] != 2340 if (bmp->db_agfree[bmp->db_maxag] !=
2341 bmp->db_agsize) 2341 bmp->db_agsize)
2342 break; 2342 break;
2343 } 2343 }
2344 2344
2345 /* re-establish the allocation group preference if the 2345 /* re-establish the allocation group preference if the
2346 * current preference is right of the maximum allocation 2346 * current preference is right of the maximum allocation
2347 * group. 2347 * group.
2348 */ 2348 */
2349 if (bmp->db_agpref > bmp->db_maxag) 2349 if (bmp->db_agpref > bmp->db_maxag)
2350 bmp->db_agpref = bmp->db_maxag; 2350 bmp->db_agpref = bmp->db_maxag;
2351 } 2351 }
2352 2352
2353 BMAP_UNLOCK(bmp); 2353 BMAP_UNLOCK(bmp);
2354 2354
2355 return 0; 2355 return 0;
2356 } 2356 }
2357 2357
2358 2358
2359 /* 2359 /*
2360 * NAME: dbAdjCtl() 2360 * NAME: dbAdjCtl()
2361 * 2361 *
2362 * FUNCTION: adjust a dmap control page at a specified level to reflect 2362 * FUNCTION: adjust a dmap control page at a specified level to reflect
2363 * the change in a lower level dmap or dmap control page's 2363 * the change in a lower level dmap or dmap control page's
2364 * maximum string of free blocks (i.e. a change in the root 2364 * maximum string of free blocks (i.e. a change in the root
2365 * of the lower level object's dmtree) due to the allocation 2365 * of the lower level object's dmtree) due to the allocation
2366 * or deallocation of a range of blocks with a single dmap. 2366 * or deallocation of a range of blocks with a single dmap.
2367 * 2367 *
2368 * on entry, this routine is provided with the new value of 2368 * on entry, this routine is provided with the new value of
2369 * the lower level dmap or dmap control page root and the 2369 * the lower level dmap or dmap control page root and the
2370 * starting block number of the block range whose allocation 2370 * starting block number of the block range whose allocation
2371 * or deallocation resulted in the root change. this range 2371 * or deallocation resulted in the root change. this range
2372 * is respresented by a single leaf of the current dmapctl 2372 * is respresented by a single leaf of the current dmapctl
2373 * and the leaf will be updated with this value, possibly 2373 * and the leaf will be updated with this value, possibly
2374 * causing a binary buddy system within the leaves to be 2374 * causing a binary buddy system within the leaves to be
2375 * split or joined. the update may also cause the dmapctl's 2375 * split or joined. the update may also cause the dmapctl's
2376 * dmtree to be updated. 2376 * dmtree to be updated.
2377 * 2377 *
2378 * if the adjustment of the dmap control page, itself, causes its 2378 * if the adjustment of the dmap control page, itself, causes its
2379 * root to change, this change will be bubbled up to the next dmap 2379 * root to change, this change will be bubbled up to the next dmap
2380 * control level by a recursive call to this routine, specifying 2380 * control level by a recursive call to this routine, specifying
2381 * the new root value and the next dmap control page level to 2381 * the new root value and the next dmap control page level to
2382 * be adjusted. 2382 * be adjusted.
2383 * PARAMETERS: 2383 * PARAMETERS:
2384 * bmp - pointer to bmap descriptor 2384 * bmp - pointer to bmap descriptor
2385 * blkno - the first block of a block range within a dmap. it is 2385 * blkno - the first block of a block range within a dmap. it is
2386 * the allocation or deallocation of this block range that 2386 * the allocation or deallocation of this block range that
2387 * requires the dmap control page to be adjusted. 2387 * requires the dmap control page to be adjusted.
2388 * newval - the new value of the lower level dmap or dmap control 2388 * newval - the new value of the lower level dmap or dmap control
2389 * page root. 2389 * page root.
2390 * alloc - 'true' if adjustment is due to an allocation. 2390 * alloc - 'true' if adjustment is due to an allocation.
2391 * level - current level of dmap control page (i.e. L0, L1, L2) to 2391 * level - current level of dmap control page (i.e. L0, L1, L2) to
2392 * be adjusted. 2392 * be adjusted.
2393 * 2393 *
2394 * RETURN VALUES: 2394 * RETURN VALUES:
2395 * 0 - success 2395 * 0 - success
2396 * -EIO - i/o error 2396 * -EIO - i/o error
2397 * 2397 *
2398 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2398 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2399 */ 2399 */
2400 static int 2400 static int
2401 dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) 2401 dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2402 { 2402 {
2403 struct metapage *mp; 2403 struct metapage *mp;
2404 s8 oldroot; 2404 s8 oldroot;
2405 int oldval; 2405 int oldval;
2406 s64 lblkno; 2406 s64 lblkno;
2407 struct dmapctl *dcp; 2407 struct dmapctl *dcp;
2408 int rc, leafno, ti; 2408 int rc, leafno, ti;
2409 2409
2410 /* get the buffer for the dmap control page for the specified 2410 /* get the buffer for the dmap control page for the specified
2411 * block number and control page level. 2411 * block number and control page level.
2412 */ 2412 */
2413 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level); 2413 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level);
2414 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 2414 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
2415 if (mp == NULL) 2415 if (mp == NULL)
2416 return -EIO; 2416 return -EIO;
2417 dcp = (struct dmapctl *) mp->data; 2417 dcp = (struct dmapctl *) mp->data;
2418 2418
2419 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { 2419 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
2420 jfs_error(bmp->db_ipbmap->i_sb, 2420 jfs_error(bmp->db_ipbmap->i_sb,
2421 "dbAdjCtl: Corrupt dmapctl page"); 2421 "dbAdjCtl: Corrupt dmapctl page");
2422 release_metapage(mp); 2422 release_metapage(mp);
2423 return -EIO; 2423 return -EIO;
2424 } 2424 }
2425 2425
2426 /* determine the leaf number corresponding to the block and 2426 /* determine the leaf number corresponding to the block and
2427 * the index within the dmap control tree. 2427 * the index within the dmap control tree.
2428 */ 2428 */
2429 leafno = BLKTOCTLLEAF(blkno, dcp->budmin); 2429 leafno = BLKTOCTLLEAF(blkno, dcp->budmin);
2430 ti = leafno + le32_to_cpu(dcp->leafidx); 2430 ti = leafno + le32_to_cpu(dcp->leafidx);
2431 2431
2432 /* save the current leaf value and the current root level (i.e. 2432 /* save the current leaf value and the current root level (i.e.
2433 * maximum l2 free string described by this dmapctl). 2433 * maximum l2 free string described by this dmapctl).
2434 */ 2434 */
2435 oldval = dcp->stree[ti]; 2435 oldval = dcp->stree[ti];
2436 oldroot = dcp->stree[ROOT]; 2436 oldroot = dcp->stree[ROOT];
2437 2437
2438 /* check if this is a control page update for an allocation. 2438 /* check if this is a control page update for an allocation.
2439 * if so, update the leaf to reflect the new leaf value using 2439 * if so, update the leaf to reflect the new leaf value using
2440 * dbSplit(); otherwise (deallocation), use dbJoin() to udpate 2440 * dbSplit(); otherwise (deallocation), use dbJoin() to udpate
2441 * the leaf with the new value. in addition to updating the 2441 * the leaf with the new value. in addition to updating the
2442 * leaf, dbSplit() will also split the binary buddy system of 2442 * leaf, dbSplit() will also split the binary buddy system of
2443 * the leaves, if required, and bubble new values within the 2443 * the leaves, if required, and bubble new values within the
2444 * dmapctl tree, if required. similarly, dbJoin() will join 2444 * dmapctl tree, if required. similarly, dbJoin() will join
2445 * the binary buddy system of leaves and bubble new values up 2445 * the binary buddy system of leaves and bubble new values up
2446 * the dmapctl tree as required by the new leaf value. 2446 * the dmapctl tree as required by the new leaf value.
2447 */ 2447 */
2448 if (alloc) { 2448 if (alloc) {
2449 /* check if we are in the middle of a binary buddy 2449 /* check if we are in the middle of a binary buddy
2450 * system. this happens when we are performing the 2450 * system. this happens when we are performing the
2451 * first allocation out of an allocation group that 2451 * first allocation out of an allocation group that
2452 * is part (not the first part) of a larger binary 2452 * is part (not the first part) of a larger binary
2453 * buddy system. if we are in the middle, back split 2453 * buddy system. if we are in the middle, back split
2454 * the system prior to calling dbSplit() which assumes 2454 * the system prior to calling dbSplit() which assumes
2455 * that it is at the front of a binary buddy system. 2455 * that it is at the front of a binary buddy system.
2456 */ 2456 */
2457 if (oldval == NOFREE) { 2457 if (oldval == NOFREE) {
2458 rc = dbBackSplit((dmtree_t *) dcp, leafno); 2458 rc = dbBackSplit((dmtree_t *) dcp, leafno);
2459 if (rc) 2459 if (rc)
2460 return rc; 2460 return rc;
2461 oldval = dcp->stree[ti]; 2461 oldval = dcp->stree[ti];
2462 } 2462 }
2463 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); 2463 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
2464 } else { 2464 } else {
2465 rc = dbJoin((dmtree_t *) dcp, leafno, newval); 2465 rc = dbJoin((dmtree_t *) dcp, leafno, newval);
2466 if (rc) 2466 if (rc)
2467 return rc; 2467 return rc;
2468 } 2468 }
2469 2469
2470 /* check if the root of the current dmap control page changed due 2470 /* check if the root of the current dmap control page changed due
2471 * to the update and if the current dmap control page is not at 2471 * to the update and if the current dmap control page is not at
2472 * the current top level (i.e. L0, L1, L2) of the map. if so (i.e. 2472 * the current top level (i.e. L0, L1, L2) of the map. if so (i.e.
2473 * root changed and this is not the top level), call this routine 2473 * root changed and this is not the top level), call this routine
2474 * again (recursion) for the next higher level of the mapping to 2474 * again (recursion) for the next higher level of the mapping to
2475 * reflect the change in root for the current dmap control page. 2475 * reflect the change in root for the current dmap control page.
2476 */ 2476 */
2477 if (dcp->stree[ROOT] != oldroot) { 2477 if (dcp->stree[ROOT] != oldroot) {
2478 /* are we below the top level of the map. if so, 2478 /* are we below the top level of the map. if so,
2479 * bubble the root up to the next higher level. 2479 * bubble the root up to the next higher level.
2480 */ 2480 */
2481 if (level < bmp->db_maxlevel) { 2481 if (level < bmp->db_maxlevel) {
2482 /* bubble up the new root of this dmap control page to 2482 /* bubble up the new root of this dmap control page to
2483 * the next level. 2483 * the next level.
2484 */ 2484 */
2485 if ((rc = 2485 if ((rc =
2486 dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc, 2486 dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc,
2487 level + 1))) { 2487 level + 1))) {
2488 /* something went wrong in bubbling up the new 2488 /* something went wrong in bubbling up the new
2489 * root value, so backout the changes to the 2489 * root value, so backout the changes to the
2490 * current dmap control page. 2490 * current dmap control page.
2491 */ 2491 */
2492 if (alloc) { 2492 if (alloc) {
2493 dbJoin((dmtree_t *) dcp, leafno, 2493 dbJoin((dmtree_t *) dcp, leafno,
2494 oldval); 2494 oldval);
2495 } else { 2495 } else {
2496 /* the dbJoin() above might have 2496 /* the dbJoin() above might have
2497 * caused a larger binary buddy system 2497 * caused a larger binary buddy system
2498 * to form and we may now be in the 2498 * to form and we may now be in the
2499 * middle of it. if this is the case, 2499 * middle of it. if this is the case,
2500 * back split the buddies. 2500 * back split the buddies.
2501 */ 2501 */
2502 if (dcp->stree[ti] == NOFREE) 2502 if (dcp->stree[ti] == NOFREE)
2503 dbBackSplit((dmtree_t *) 2503 dbBackSplit((dmtree_t *)
2504 dcp, leafno); 2504 dcp, leafno);
2505 dbSplit((dmtree_t *) dcp, leafno, 2505 dbSplit((dmtree_t *) dcp, leafno,
2506 dcp->budmin, oldval); 2506 dcp->budmin, oldval);
2507 } 2507 }
2508 2508
2509 /* release the buffer and return the error. 2509 /* release the buffer and return the error.
2510 */ 2510 */
2511 release_metapage(mp); 2511 release_metapage(mp);
2512 return (rc); 2512 return (rc);
2513 } 2513 }
2514 } else { 2514 } else {
2515 /* we're at the top level of the map. update 2515 /* we're at the top level of the map. update
2516 * the bmap control page to reflect the size 2516 * the bmap control page to reflect the size
2517 * of the maximum free buddy system. 2517 * of the maximum free buddy system.
2518 */ 2518 */
2519 assert(level == bmp->db_maxlevel); 2519 assert(level == bmp->db_maxlevel);
2520 if (bmp->db_maxfreebud != oldroot) { 2520 if (bmp->db_maxfreebud != oldroot) {
2521 jfs_error(bmp->db_ipbmap->i_sb, 2521 jfs_error(bmp->db_ipbmap->i_sb,
2522 "dbAdjCtl: the maximum free buddy is " 2522 "dbAdjCtl: the maximum free buddy is "
2523 "not the old root"); 2523 "not the old root");
2524 } 2524 }
2525 bmp->db_maxfreebud = dcp->stree[ROOT]; 2525 bmp->db_maxfreebud = dcp->stree[ROOT];
2526 } 2526 }
2527 } 2527 }
2528 2528
2529 /* write the buffer. 2529 /* write the buffer.
2530 */ 2530 */
2531 write_metapage(mp); 2531 write_metapage(mp);
2532 2532
2533 return (0); 2533 return (0);
2534 } 2534 }
2535 2535
2536 2536
2537 /* 2537 /*
2538 * NAME: dbSplit() 2538 * NAME: dbSplit()
2539 * 2539 *
2540 * FUNCTION: update the leaf of a dmtree with a new value, splitting 2540 * FUNCTION: update the leaf of a dmtree with a new value, splitting
2541 * the leaf from the binary buddy system of the dmtree's 2541 * the leaf from the binary buddy system of the dmtree's
2542 * leaves, as required. 2542 * leaves, as required.
2543 * 2543 *
2544 * PARAMETERS: 2544 * PARAMETERS:
2545 * tp - pointer to the tree containing the leaf. 2545 * tp - pointer to the tree containing the leaf.
2546 * leafno - the number of the leaf to be updated. 2546 * leafno - the number of the leaf to be updated.
2547 * splitsz - the size the binary buddy system starting at the leaf 2547 * splitsz - the size the binary buddy system starting at the leaf
2548 * must be split to, specified as the log2 number of blocks. 2548 * must be split to, specified as the log2 number of blocks.
2549 * newval - the new value for the leaf. 2549 * newval - the new value for the leaf.
2550 * 2550 *
2551 * RETURN VALUES: none 2551 * RETURN VALUES: none
2552 * 2552 *
2553 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2553 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2554 */ 2554 */
2555 static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) 2555 static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2556 { 2556 {
2557 int budsz; 2557 int budsz;
2558 int cursz; 2558 int cursz;
2559 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); 2559 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2560 2560
2561 /* check if the leaf needs to be split. 2561 /* check if the leaf needs to be split.
2562 */ 2562 */
2563 if (leaf[leafno] > tp->dmt_budmin) { 2563 if (leaf[leafno] > tp->dmt_budmin) {
2564 /* the split occurs by cutting the buddy system in half 2564 /* the split occurs by cutting the buddy system in half
2565 * at the specified leaf until we reach the specified 2565 * at the specified leaf until we reach the specified
2566 * size. pick up the starting split size (current size 2566 * size. pick up the starting split size (current size
2567 * - 1 in l2) and the corresponding buddy size. 2567 * - 1 in l2) and the corresponding buddy size.
2568 */ 2568 */
2569 cursz = leaf[leafno] - 1; 2569 cursz = leaf[leafno] - 1;
2570 budsz = BUDSIZE(cursz, tp->dmt_budmin); 2570 budsz = BUDSIZE(cursz, tp->dmt_budmin);
2571 2571
2572 /* split until we reach the specified size. 2572 /* split until we reach the specified size.
2573 */ 2573 */
2574 while (cursz >= splitsz) { 2574 while (cursz >= splitsz) {
2575 /* update the buddy's leaf with its new value. 2575 /* update the buddy's leaf with its new value.
2576 */ 2576 */
2577 dbAdjTree(tp, leafno ^ budsz, cursz); 2577 dbAdjTree(tp, leafno ^ budsz, cursz);
2578 2578
2579 /* on to the next size and buddy. 2579 /* on to the next size and buddy.
2580 */ 2580 */
2581 cursz -= 1; 2581 cursz -= 1;
2582 budsz >>= 1; 2582 budsz >>= 1;
2583 } 2583 }
2584 } 2584 }
2585 2585
2586 /* adjust the dmap tree to reflect the specified leaf's new 2586 /* adjust the dmap tree to reflect the specified leaf's new
2587 * value. 2587 * value.
2588 */ 2588 */
2589 dbAdjTree(tp, leafno, newval); 2589 dbAdjTree(tp, leafno, newval);
2590 } 2590 }
2591 2591
2592 2592
2593 /* 2593 /*
2594 * NAME: dbBackSplit() 2594 * NAME: dbBackSplit()
2595 * 2595 *
2596 * FUNCTION: back split the binary buddy system of dmtree leaves 2596 * FUNCTION: back split the binary buddy system of dmtree leaves
2597 * that hold a specified leaf until the specified leaf 2597 * that hold a specified leaf until the specified leaf
2598 * starts its own binary buddy system. 2598 * starts its own binary buddy system.
2599 * 2599 *
2600 * the allocators typically perform allocations at the start 2600 * the allocators typically perform allocations at the start
2601 * of binary buddy systems and dbSplit() is used to accomplish 2601 * of binary buddy systems and dbSplit() is used to accomplish
2602 * any required splits. in some cases, however, allocation 2602 * any required splits. in some cases, however, allocation
2603 * may occur in the middle of a binary system and requires a 2603 * may occur in the middle of a binary system and requires a
2604 * back split, with the split proceeding out from the middle of 2604 * back split, with the split proceeding out from the middle of
2605 * the system (less efficient) rather than the start of the 2605 * the system (less efficient) rather than the start of the
2606 * system (more efficient). the cases in which a back split 2606 * system (more efficient). the cases in which a back split
2607 * is required are rare and are limited to the first allocation 2607 * is required are rare and are limited to the first allocation
2608 * within an allocation group which is a part (not first part) 2608 * within an allocation group which is a part (not first part)
2609 * of a larger binary buddy system and a few exception cases 2609 * of a larger binary buddy system and a few exception cases
2610 * in which a previous join operation must be backed out. 2610 * in which a previous join operation must be backed out.
2611 * 2611 *
2612 * PARAMETERS: 2612 * PARAMETERS:
2613 * tp - pointer to the tree containing the leaf. 2613 * tp - pointer to the tree containing the leaf.
2614 * leafno - the number of the leaf to be updated. 2614 * leafno - the number of the leaf to be updated.
2615 * 2615 *
2616 * RETURN VALUES: none 2616 * RETURN VALUES: none
2617 * 2617 *
2618 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2618 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2619 */ 2619 */
2620 static int dbBackSplit(dmtree_t * tp, int leafno) 2620 static int dbBackSplit(dmtree_t * tp, int leafno)
2621 { 2621 {
2622 int budsz, bud, w, bsz, size; 2622 int budsz, bud, w, bsz, size;
2623 int cursz; 2623 int cursz;
2624 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); 2624 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2625 2625
2626 /* leaf should be part (not first part) of a binary 2626 /* leaf should be part (not first part) of a binary
2627 * buddy system. 2627 * buddy system.
2628 */ 2628 */
2629 assert(leaf[leafno] == NOFREE); 2629 assert(leaf[leafno] == NOFREE);
2630 2630
2631 /* the back split is accomplished by iteratively finding the leaf 2631 /* the back split is accomplished by iteratively finding the leaf
2632 * that starts the buddy system that contains the specified leaf and 2632 * that starts the buddy system that contains the specified leaf and
2633 * splitting that system in two. this iteration continues until 2633 * splitting that system in two. this iteration continues until
2634 * the specified leaf becomes the start of a buddy system. 2634 * the specified leaf becomes the start of a buddy system.
2635 * 2635 *
2636 * determine maximum possible l2 size for the specified leaf. 2636 * determine maximum possible l2 size for the specified leaf.
2637 */ 2637 */
2638 size = 2638 size =
2639 LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs), 2639 LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs),
2640 tp->dmt_budmin); 2640 tp->dmt_budmin);
2641 2641
2642 /* determine the number of leaves covered by this size. this 2642 /* determine the number of leaves covered by this size. this
2643 * is the buddy size that we will start with as we search for 2643 * is the buddy size that we will start with as we search for
2644 * the buddy system that contains the specified leaf. 2644 * the buddy system that contains the specified leaf.
2645 */ 2645 */
2646 budsz = BUDSIZE(size, tp->dmt_budmin); 2646 budsz = BUDSIZE(size, tp->dmt_budmin);
2647 2647
2648 /* back split. 2648 /* back split.
2649 */ 2649 */
2650 while (leaf[leafno] == NOFREE) { 2650 while (leaf[leafno] == NOFREE) {
2651 /* find the leftmost buddy leaf. 2651 /* find the leftmost buddy leaf.
2652 */ 2652 */
2653 for (w = leafno, bsz = budsz;; bsz <<= 1, 2653 for (w = leafno, bsz = budsz;; bsz <<= 1,
2654 w = (w < bud) ? w : bud) { 2654 w = (w < bud) ? w : bud) {
2655 if (bsz >= le32_to_cpu(tp->dmt_nleafs)) { 2655 if (bsz >= le32_to_cpu(tp->dmt_nleafs)) {
2656 jfs_err("JFS: block map error in dbBackSplit"); 2656 jfs_err("JFS: block map error in dbBackSplit");
2657 return -EIO; 2657 return -EIO;
2658 } 2658 }
2659 2659
2660 /* determine the buddy. 2660 /* determine the buddy.
2661 */ 2661 */
2662 bud = w ^ bsz; 2662 bud = w ^ bsz;
2663 2663
2664 /* check if this buddy is the start of the system. 2664 /* check if this buddy is the start of the system.
2665 */ 2665 */
2666 if (leaf[bud] != NOFREE) { 2666 if (leaf[bud] != NOFREE) {
2667 /* split the leaf at the start of the 2667 /* split the leaf at the start of the
2668 * system in two. 2668 * system in two.
2669 */ 2669 */
2670 cursz = leaf[bud] - 1; 2670 cursz = leaf[bud] - 1;
2671 dbSplit(tp, bud, cursz, cursz); 2671 dbSplit(tp, bud, cursz, cursz);
2672 break; 2672 break;
2673 } 2673 }
2674 } 2674 }
2675 } 2675 }
2676 2676
2677 if (leaf[leafno] != size) { 2677 if (leaf[leafno] != size) {
2678 jfs_err("JFS: wrong leaf value in dbBackSplit"); 2678 jfs_err("JFS: wrong leaf value in dbBackSplit");
2679 return -EIO; 2679 return -EIO;
2680 } 2680 }
2681 return 0; 2681 return 0;
2682 } 2682 }
2683 2683
2684 2684
2685 /* 2685 /*
2686 * NAME: dbJoin() 2686 * NAME: dbJoin()
2687 * 2687 *
2688 * FUNCTION: update the leaf of a dmtree with a new value, joining 2688 * FUNCTION: update the leaf of a dmtree with a new value, joining
2689 * the leaf with other leaves of the dmtree into a multi-leaf 2689 * the leaf with other leaves of the dmtree into a multi-leaf
2690 * binary buddy system, as required. 2690 * binary buddy system, as required.
2691 * 2691 *
2692 * PARAMETERS: 2692 * PARAMETERS:
2693 * tp - pointer to the tree containing the leaf. 2693 * tp - pointer to the tree containing the leaf.
2694 * leafno - the number of the leaf to be updated. 2694 * leafno - the number of the leaf to be updated.
2695 * newval - the new value for the leaf. 2695 * newval - the new value for the leaf.
2696 * 2696 *
2697 * RETURN VALUES: none 2697 * RETURN VALUES: none
2698 */ 2698 */
2699 static int dbJoin(dmtree_t * tp, int leafno, int newval) 2699 static int dbJoin(dmtree_t * tp, int leafno, int newval)
2700 { 2700 {
2701 int budsz, buddy; 2701 int budsz, buddy;
2702 s8 *leaf; 2702 s8 *leaf;
2703 2703
2704 /* can the new leaf value require a join with other leaves ? 2704 /* can the new leaf value require a join with other leaves ?
2705 */ 2705 */
2706 if (newval >= tp->dmt_budmin) { 2706 if (newval >= tp->dmt_budmin) {
2707 /* pickup a pointer to the leaves of the tree. 2707 /* pickup a pointer to the leaves of the tree.
2708 */ 2708 */
2709 leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); 2709 leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2710 2710
2711 /* try to join the specified leaf into a large binary 2711 /* try to join the specified leaf into a large binary
2712 * buddy system. the join proceeds by attempting to join 2712 * buddy system. the join proceeds by attempting to join
2713 * the specified leafno with its buddy (leaf) at new value. 2713 * the specified leafno with its buddy (leaf) at new value.
2714 * if the join occurs, we attempt to join the left leaf 2714 * if the join occurs, we attempt to join the left leaf
2715 * of the joined buddies with its buddy at new value + 1. 2715 * of the joined buddies with its buddy at new value + 1.
2716 * we continue to join until we find a buddy that cannot be 2716 * we continue to join until we find a buddy that cannot be
2717 * joined (does not have a value equal to the size of the 2717 * joined (does not have a value equal to the size of the
2718 * last join) or until all leaves have been joined into a 2718 * last join) or until all leaves have been joined into a
2719 * single system. 2719 * single system.
2720 * 2720 *
2721 * get the buddy size (number of words covered) of 2721 * get the buddy size (number of words covered) of
2722 * the new value. 2722 * the new value.
2723 */ 2723 */
2724 budsz = BUDSIZE(newval, tp->dmt_budmin); 2724 budsz = BUDSIZE(newval, tp->dmt_budmin);
2725 2725
2726 /* try to join. 2726 /* try to join.
2727 */ 2727 */
2728 while (budsz < le32_to_cpu(tp->dmt_nleafs)) { 2728 while (budsz < le32_to_cpu(tp->dmt_nleafs)) {
2729 /* get the buddy leaf. 2729 /* get the buddy leaf.
2730 */ 2730 */
2731 buddy = leafno ^ budsz; 2731 buddy = leafno ^ budsz;
2732 2732
2733 /* if the leaf's new value is greater than its 2733 /* if the leaf's new value is greater than its
2734 * buddy's value, we join no more. 2734 * buddy's value, we join no more.
2735 */ 2735 */
2736 if (newval > leaf[buddy]) 2736 if (newval > leaf[buddy])
2737 break; 2737 break;
2738 2738
2739 /* It shouldn't be less */ 2739 /* It shouldn't be less */
2740 if (newval < leaf[buddy]) 2740 if (newval < leaf[buddy])
2741 return -EIO; 2741 return -EIO;
2742 2742
2743 /* check which (leafno or buddy) is the left buddy. 2743 /* check which (leafno or buddy) is the left buddy.
2744 * the left buddy gets to claim the blocks resulting 2744 * the left buddy gets to claim the blocks resulting
2745 * from the join while the right gets to claim none. 2745 * from the join while the right gets to claim none.
2746 * the left buddy is also eligable to participate in 2746 * the left buddy is also eligable to participate in
2747 * a join at the next higher level while the right 2747 * a join at the next higher level while the right
2748 * is not. 2748 * is not.
2749 * 2749 *
2750 */ 2750 */
2751 if (leafno < buddy) { 2751 if (leafno < buddy) {
2752 /* leafno is the left buddy. 2752 /* leafno is the left buddy.
2753 */ 2753 */
2754 dbAdjTree(tp, buddy, NOFREE); 2754 dbAdjTree(tp, buddy, NOFREE);
2755 } else { 2755 } else {
2756 /* buddy is the left buddy and becomes 2756 /* buddy is the left buddy and becomes
2757 * leafno. 2757 * leafno.
2758 */ 2758 */
2759 dbAdjTree(tp, leafno, NOFREE); 2759 dbAdjTree(tp, leafno, NOFREE);
2760 leafno = buddy; 2760 leafno = buddy;
2761 } 2761 }
2762 2762
2763 /* on to try the next join. 2763 /* on to try the next join.
2764 */ 2764 */
2765 newval += 1; 2765 newval += 1;
2766 budsz <<= 1; 2766 budsz <<= 1;
2767 } 2767 }
2768 } 2768 }
2769 2769
2770 /* update the leaf value. 2770 /* update the leaf value.
2771 */ 2771 */
2772 dbAdjTree(tp, leafno, newval); 2772 dbAdjTree(tp, leafno, newval);
2773 2773
2774 return 0; 2774 return 0;
2775 } 2775 }
2776 2776
2777 2777
2778 /* 2778 /*
2779 * NAME: dbAdjTree() 2779 * NAME: dbAdjTree()
2780 * 2780 *
2781 * FUNCTION: update a leaf of a dmtree with a new value, adjusting 2781 * FUNCTION: update a leaf of a dmtree with a new value, adjusting
2782 * the dmtree, as required, to reflect the new leaf value. 2782 * the dmtree, as required, to reflect the new leaf value.
2783 * the combination of any buddies must already be done before 2783 * the combination of any buddies must already be done before
2784 * this is called. 2784 * this is called.
2785 * 2785 *
2786 * PARAMETERS: 2786 * PARAMETERS:
2787 * tp - pointer to the tree to be adjusted. 2787 * tp - pointer to the tree to be adjusted.
2788 * leafno - the number of the leaf to be updated. 2788 * leafno - the number of the leaf to be updated.
2789 * newval - the new value for the leaf. 2789 * newval - the new value for the leaf.
2790 * 2790 *
2791 * RETURN VALUES: none 2791 * RETURN VALUES: none
2792 */ 2792 */
2793 static void dbAdjTree(dmtree_t * tp, int leafno, int newval) 2793 static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2794 { 2794 {
2795 int lp, pp, k; 2795 int lp, pp, k;
2796 int max; 2796 int max;
2797 2797
2798 /* pick up the index of the leaf for this leafno. 2798 /* pick up the index of the leaf for this leafno.
2799 */ 2799 */
2800 lp = leafno + le32_to_cpu(tp->dmt_leafidx); 2800 lp = leafno + le32_to_cpu(tp->dmt_leafidx);
2801 2801
2802 /* is the current value the same as the old value ? if so, 2802 /* is the current value the same as the old value ? if so,
2803 * there is nothing to do. 2803 * there is nothing to do.
2804 */ 2804 */
2805 if (tp->dmt_stree[lp] == newval) 2805 if (tp->dmt_stree[lp] == newval)
2806 return; 2806 return;
2807 2807
2808 /* set the new value. 2808 /* set the new value.
2809 */ 2809 */
2810 tp->dmt_stree[lp] = newval; 2810 tp->dmt_stree[lp] = newval;
2811 2811
2812 /* bubble the new value up the tree as required. 2812 /* bubble the new value up the tree as required.
2813 */ 2813 */
2814 for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) { 2814 for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) {
2815 /* get the index of the first leaf of the 4 leaf 2815 /* get the index of the first leaf of the 4 leaf
2816 * group containing the specified leaf (leafno). 2816 * group containing the specified leaf (leafno).
2817 */ 2817 */
2818 lp = ((lp - 1) & ~0x03) + 1; 2818 lp = ((lp - 1) & ~0x03) + 1;
2819 2819
2820 /* get the index of the parent of this 4 leaf group. 2820 /* get the index of the parent of this 4 leaf group.
2821 */ 2821 */
2822 pp = (lp - 1) >> 2; 2822 pp = (lp - 1) >> 2;
2823 2823
2824 /* determine the maximum of the 4 leaves. 2824 /* determine the maximum of the 4 leaves.
2825 */ 2825 */
2826 max = TREEMAX(&tp->dmt_stree[lp]); 2826 max = TREEMAX(&tp->dmt_stree[lp]);
2827 2827
2828 /* if the maximum of the 4 is the same as the 2828 /* if the maximum of the 4 is the same as the
2829 * parent's value, we're done. 2829 * parent's value, we're done.
2830 */ 2830 */
2831 if (tp->dmt_stree[pp] == max) 2831 if (tp->dmt_stree[pp] == max)
2832 break; 2832 break;
2833 2833
2834 /* parent gets new value. 2834 /* parent gets new value.
2835 */ 2835 */
2836 tp->dmt_stree[pp] = max; 2836 tp->dmt_stree[pp] = max;
2837 2837
2838 /* parent becomes leaf for next go-round. 2838 /* parent becomes leaf for next go-round.
2839 */ 2839 */
2840 lp = pp; 2840 lp = pp;
2841 } 2841 }
2842 } 2842 }
2843 2843
2844 2844
2845 /* 2845 /*
2846 * NAME: dbFindLeaf() 2846 * NAME: dbFindLeaf()
2847 * 2847 *
2848 * FUNCTION: search a dmtree_t for sufficient free blocks, returning 2848 * FUNCTION: search a dmtree_t for sufficient free blocks, returning
2849 * the index of a leaf describing the free blocks if 2849 * the index of a leaf describing the free blocks if
2850 * sufficient free blocks are found. 2850 * sufficient free blocks are found.
2851 * 2851 *
2852 * the search starts at the top of the dmtree_t tree and 2852 * the search starts at the top of the dmtree_t tree and
2853 * proceeds down the tree to the leftmost leaf with sufficient 2853 * proceeds down the tree to the leftmost leaf with sufficient
2854 * free space. 2854 * free space.
2855 * 2855 *
2856 * PARAMETERS: 2856 * PARAMETERS:
2857 * tp - pointer to the tree to be searched. 2857 * tp - pointer to the tree to be searched.
2858 * l2nb - log2 number of free blocks to search for. 2858 * l2nb - log2 number of free blocks to search for.
2859 * leafidx - return pointer to be set to the index of the leaf 2859 * leafidx - return pointer to be set to the index of the leaf
2860 * describing at least l2nb free blocks if sufficient 2860 * describing at least l2nb free blocks if sufficient
2861 * free blocks are found. 2861 * free blocks are found.
2862 * 2862 *
2863 * RETURN VALUES: 2863 * RETURN VALUES:
2864 * 0 - success 2864 * 0 - success
2865 * -ENOSPC - insufficient free blocks. 2865 * -ENOSPC - insufficient free blocks.
2866 */ 2866 */
2867 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) 2867 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2868 { 2868 {
2869 int ti, n = 0, k, x = 0; 2869 int ti, n = 0, k, x = 0;
2870 2870
2871 /* first check the root of the tree to see if there is 2871 /* first check the root of the tree to see if there is
2872 * sufficient free space. 2872 * sufficient free space.
2873 */ 2873 */
2874 if (l2nb > tp->dmt_stree[ROOT]) 2874 if (l2nb > tp->dmt_stree[ROOT])
2875 return -ENOSPC; 2875 return -ENOSPC;
2876 2876
2877 /* sufficient free space available. now search down the tree 2877 /* sufficient free space available. now search down the tree
2878 * starting at the next level for the leftmost leaf that 2878 * starting at the next level for the leftmost leaf that
2879 * describes sufficient free space. 2879 * describes sufficient free space.
2880 */ 2880 */
2881 for (k = le32_to_cpu(tp->dmt_height), ti = 1; 2881 for (k = le32_to_cpu(tp->dmt_height), ti = 1;
2882 k > 0; k--, ti = ((ti + n) << 2) + 1) { 2882 k > 0; k--, ti = ((ti + n) << 2) + 1) {
2883 /* search the four nodes at this level, starting from 2883 /* search the four nodes at this level, starting from
2884 * the left. 2884 * the left.
2885 */ 2885 */
2886 for (x = ti, n = 0; n < 4; n++) { 2886 for (x = ti, n = 0; n < 4; n++) {
2887 /* sufficient free space found. move to the next 2887 /* sufficient free space found. move to the next
2888 * level (or quit if this is the last level). 2888 * level (or quit if this is the last level).
2889 */ 2889 */
2890 if (l2nb <= tp->dmt_stree[x + n]) 2890 if (l2nb <= tp->dmt_stree[x + n])
2891 break; 2891 break;
2892 } 2892 }
2893 2893
2894 /* better have found something since the higher 2894 /* better have found something since the higher
2895 * levels of the tree said it was here. 2895 * levels of the tree said it was here.
2896 */ 2896 */
2897 assert(n < 4); 2897 assert(n < 4);
2898 } 2898 }
2899 2899
2900 /* set the return to the leftmost leaf describing sufficient 2900 /* set the return to the leftmost leaf describing sufficient
2901 * free space. 2901 * free space.
2902 */ 2902 */
2903 *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx); 2903 *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx);
2904 2904
2905 return (0); 2905 return (0);
2906 } 2906 }
2907 2907
2908 2908
2909 /* 2909 /*
2910 * NAME: dbFindBits() 2910 * NAME: dbFindBits()
2911 * 2911 *
2912 * FUNCTION: find a specified number of binary buddy free bits within a 2912 * FUNCTION: find a specified number of binary buddy free bits within a
2913 * dmap bitmap word value. 2913 * dmap bitmap word value.
2914 * 2914 *
2915 * this routine searches the bitmap value for (1 << l2nb) free 2915 * this routine searches the bitmap value for (1 << l2nb) free
2916 * bits at (1 << l2nb) alignments within the value. 2916 * bits at (1 << l2nb) alignments within the value.
2917 * 2917 *
2918 * PARAMETERS: 2918 * PARAMETERS:
2919 * word - dmap bitmap word value. 2919 * word - dmap bitmap word value.
2920 * l2nb - number of free bits specified as a log2 number. 2920 * l2nb - number of free bits specified as a log2 number.
2921 * 2921 *
2922 * RETURN VALUES: 2922 * RETURN VALUES:
2923 * starting bit number of free bits. 2923 * starting bit number of free bits.
2924 */ 2924 */
2925 static int dbFindBits(u32 word, int l2nb) 2925 static int dbFindBits(u32 word, int l2nb)
2926 { 2926 {
2927 int bitno, nb; 2927 int bitno, nb;
2928 u32 mask; 2928 u32 mask;
2929 2929
2930 /* get the number of bits. 2930 /* get the number of bits.
2931 */ 2931 */
2932 nb = 1 << l2nb; 2932 nb = 1 << l2nb;
2933 assert(nb <= DBWORD); 2933 assert(nb <= DBWORD);
2934 2934
2935 /* complement the word so we can use a mask (i.e. 0s represent 2935 /* complement the word so we can use a mask (i.e. 0s represent
2936 * free bits) and compute the mask. 2936 * free bits) and compute the mask.
2937 */ 2937 */
2938 word = ~word; 2938 word = ~word;
2939 mask = ONES << (DBWORD - nb); 2939 mask = ONES << (DBWORD - nb);
2940 2940
2941 /* scan the word for nb free bits at nb alignments. 2941 /* scan the word for nb free bits at nb alignments.
2942 */ 2942 */
2943 for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) { 2943 for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) {
2944 if ((mask & word) == mask) 2944 if ((mask & word) == mask)
2945 break; 2945 break;
2946 } 2946 }
2947 2947
2948 ASSERT(bitno < 32); 2948 ASSERT(bitno < 32);
2949 2949
2950 /* return the bit number. 2950 /* return the bit number.
2951 */ 2951 */
2952 return (bitno); 2952 return (bitno);
2953 } 2953 }
2954 2954
2955 2955
2956 /* 2956 /*
2957 * NAME: dbMaxBud(u8 *cp) 2957 * NAME: dbMaxBud(u8 *cp)
2958 * 2958 *
2959 * FUNCTION: determine the largest binary buddy string of free 2959 * FUNCTION: determine the largest binary buddy string of free
2960 * bits within 32-bits of the map. 2960 * bits within 32-bits of the map.
2961 * 2961 *
2962 * PARAMETERS: 2962 * PARAMETERS:
2963 * cp - pointer to the 32-bit value. 2963 * cp - pointer to the 32-bit value.
2964 * 2964 *
2965 * RETURN VALUES: 2965 * RETURN VALUES:
2966 * largest binary buddy of free bits within a dmap word. 2966 * largest binary buddy of free bits within a dmap word.
2967 */ 2967 */
2968 static int dbMaxBud(u8 * cp) 2968 static int dbMaxBud(u8 * cp)
2969 { 2969 {
2970 signed char tmp1, tmp2; 2970 signed char tmp1, tmp2;
2971 2971
2972 /* check if the wmap word is all free. if so, the 2972 /* check if the wmap word is all free. if so, the
2973 * free buddy size is BUDMIN. 2973 * free buddy size is BUDMIN.
2974 */ 2974 */
2975 if (*((uint *) cp) == 0) 2975 if (*((uint *) cp) == 0)
2976 return (BUDMIN); 2976 return (BUDMIN);
2977 2977
2978 /* check if the wmap word is half free. if so, the 2978 /* check if the wmap word is half free. if so, the
2979 * free buddy size is BUDMIN-1. 2979 * free buddy size is BUDMIN-1.
2980 */ 2980 */
2981 if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0) 2981 if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0)
2982 return (BUDMIN - 1); 2982 return (BUDMIN - 1);
2983 2983
2984 /* not all free or half free. determine the free buddy 2984 /* not all free or half free. determine the free buddy
2985 * size thru table lookup using quarters of the wmap word. 2985 * size thru table lookup using quarters of the wmap word.
2986 */ 2986 */
2987 tmp1 = max(budtab[cp[2]], budtab[cp[3]]); 2987 tmp1 = max(budtab[cp[2]], budtab[cp[3]]);
2988 tmp2 = max(budtab[cp[0]], budtab[cp[1]]); 2988 tmp2 = max(budtab[cp[0]], budtab[cp[1]]);
2989 return (max(tmp1, tmp2)); 2989 return (max(tmp1, tmp2));
2990 } 2990 }
2991 2991
2992 2992
2993 /* 2993 /*
2994 * NAME: cnttz(uint word) 2994 * NAME: cnttz(uint word)
2995 * 2995 *
2996 * FUNCTION: determine the number of trailing zeros within a 32-bit 2996 * FUNCTION: determine the number of trailing zeros within a 32-bit
2997 * value. 2997 * value.
2998 * 2998 *
2999 * PARAMETERS: 2999 * PARAMETERS:
3000 * value - 32-bit value to be examined. 3000 * value - 32-bit value to be examined.
3001 * 3001 *
3002 * RETURN VALUES: 3002 * RETURN VALUES:
3003 * count of trailing zeros 3003 * count of trailing zeros
3004 */ 3004 */
3005 static int cnttz(u32 word) 3005 static int cnttz(u32 word)
3006 { 3006 {
3007 int n; 3007 int n;
3008 3008
3009 for (n = 0; n < 32; n++, word >>= 1) { 3009 for (n = 0; n < 32; n++, word >>= 1) {
3010 if (word & 0x01) 3010 if (word & 0x01)
3011 break; 3011 break;
3012 } 3012 }
3013 3013
3014 return (n); 3014 return (n);
3015 } 3015 }
3016 3016
3017 3017
3018 /* 3018 /*
3019 * NAME: cntlz(u32 value) 3019 * NAME: cntlz(u32 value)
3020 * 3020 *
3021 * FUNCTION: determine the number of leading zeros within a 32-bit 3021 * FUNCTION: determine the number of leading zeros within a 32-bit
3022 * value. 3022 * value.
3023 * 3023 *
3024 * PARAMETERS: 3024 * PARAMETERS:
3025 * value - 32-bit value to be examined. 3025 * value - 32-bit value to be examined.
3026 * 3026 *
3027 * RETURN VALUES: 3027 * RETURN VALUES:
3028 * count of leading zeros 3028 * count of leading zeros
3029 */ 3029 */
3030 static int cntlz(u32 value) 3030 static int cntlz(u32 value)
3031 { 3031 {
3032 int n; 3032 int n;
3033 3033
3034 for (n = 0; n < 32; n++, value <<= 1) { 3034 for (n = 0; n < 32; n++, value <<= 1) {
3035 if (value & HIGHORDER) 3035 if (value & HIGHORDER)
3036 break; 3036 break;
3037 } 3037 }
3038 return (n); 3038 return (n);
3039 } 3039 }
3040 3040
3041 3041
3042 /* 3042 /*
3043 * NAME: blkstol2(s64 nb) 3043 * NAME: blkstol2(s64 nb)
3044 * 3044 *
3045 * FUNCTION: convert a block count to its log2 value. if the block 3045 * FUNCTION: convert a block count to its log2 value. if the block
3046 * count is not a l2 multiple, it is rounded up to the next 3046 * count is not a l2 multiple, it is rounded up to the next
3047 * larger l2 multiple. 3047 * larger l2 multiple.
3048 * 3048 *
3049 * PARAMETERS: 3049 * PARAMETERS:
3050 * nb - number of blocks 3050 * nb - number of blocks
3051 * 3051 *
3052 * RETURN VALUES: 3052 * RETURN VALUES:
3053 * log2 number of blocks 3053 * log2 number of blocks
3054 */ 3054 */
3055 static int blkstol2(s64 nb) 3055 static int blkstol2(s64 nb)
3056 { 3056 {
3057 int l2nb; 3057 int l2nb;
3058 s64 mask; /* meant to be signed */ 3058 s64 mask; /* meant to be signed */
3059 3059
3060 mask = (s64) 1 << (64 - 1); 3060 mask = (s64) 1 << (64 - 1);
3061 3061
3062 /* count the leading bits. 3062 /* count the leading bits.
3063 */ 3063 */
3064 for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) { 3064 for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) {
3065 /* leading bit found. 3065 /* leading bit found.
3066 */ 3066 */
3067 if (nb & mask) { 3067 if (nb & mask) {
3068 /* determine the l2 value. 3068 /* determine the l2 value.
3069 */ 3069 */
3070 l2nb = (64 - 1) - l2nb; 3070 l2nb = (64 - 1) - l2nb;
3071 3071
3072 /* check if we need to round up. 3072 /* check if we need to round up.
3073 */ 3073 */
3074 if (~mask & nb) 3074 if (~mask & nb)
3075 l2nb++; 3075 l2nb++;
3076 3076
3077 return (l2nb); 3077 return (l2nb);
3078 } 3078 }
3079 } 3079 }
3080 assert(0); 3080 assert(0);
3081 return 0; /* fix compiler warning */ 3081 return 0; /* fix compiler warning */
3082 } 3082 }
3083 3083
3084 3084
3085 /* 3085 /*
3086 * NAME: dbAllocBottomUp() 3086 * NAME: dbAllocBottomUp()
3087 * 3087 *
3088 * FUNCTION: alloc the specified block range from the working block 3088 * FUNCTION: alloc the specified block range from the working block
3089 * allocation map. 3089 * allocation map.
3090 * 3090 *
3091 * the blocks will be alloc from the working map one dmap 3091 * the blocks will be alloc from the working map one dmap
3092 * at a time. 3092 * at a time.
3093 * 3093 *
3094 * PARAMETERS: 3094 * PARAMETERS:
3095 * ip - pointer to in-core inode; 3095 * ip - pointer to in-core inode;
3096 * blkno - starting block number to be freed. 3096 * blkno - starting block number to be freed.
3097 * nblocks - number of blocks to be freed. 3097 * nblocks - number of blocks to be freed.
3098 * 3098 *
3099 * RETURN VALUES: 3099 * RETURN VALUES:
3100 * 0 - success 3100 * 0 - success
3101 * -EIO - i/o error 3101 * -EIO - i/o error
3102 */ 3102 */
3103 int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) 3103 int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3104 { 3104 {
3105 struct metapage *mp; 3105 struct metapage *mp;
3106 struct dmap *dp; 3106 struct dmap *dp;
3107 int nb, rc; 3107 int nb, rc;
3108 s64 lblkno, rem; 3108 s64 lblkno, rem;
3109 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 3109 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
3110 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 3110 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
3111 3111
3112 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 3112 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
3113 3113
3114 /* block to be allocated better be within the mapsize. */ 3114 /* block to be allocated better be within the mapsize. */
3115 ASSERT(nblocks <= bmp->db_mapsize - blkno); 3115 ASSERT(nblocks <= bmp->db_mapsize - blkno);
3116 3116
3117 /* 3117 /*
3118 * allocate the blocks a dmap at a time. 3118 * allocate the blocks a dmap at a time.
3119 */ 3119 */
3120 mp = NULL; 3120 mp = NULL;
3121 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { 3121 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
3122 /* release previous dmap if any */ 3122 /* release previous dmap if any */
3123 if (mp) { 3123 if (mp) {
3124 write_metapage(mp); 3124 write_metapage(mp);
3125 } 3125 }
3126 3126
3127 /* get the buffer for the current dmap. */ 3127 /* get the buffer for the current dmap. */
3128 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 3128 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
3129 mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 3129 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
3130 if (mp == NULL) { 3130 if (mp == NULL) {
3131 IREAD_UNLOCK(ipbmap); 3131 IREAD_UNLOCK(ipbmap);
3132 return -EIO; 3132 return -EIO;
3133 } 3133 }
3134 dp = (struct dmap *) mp->data; 3134 dp = (struct dmap *) mp->data;
3135 3135
3136 /* determine the number of blocks to be allocated from 3136 /* determine the number of blocks to be allocated from
3137 * this dmap. 3137 * this dmap.
3138 */ 3138 */
3139 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); 3139 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
3140 3140
3141 /* allocate the blocks. */ 3141 /* allocate the blocks. */
3142 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { 3142 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
3143 release_metapage(mp); 3143 release_metapage(mp);
3144 IREAD_UNLOCK(ipbmap); 3144 IREAD_UNLOCK(ipbmap);
3145 return (rc); 3145 return (rc);
3146 } 3146 }
3147 } 3147 }
3148 3148
3149 /* write the last buffer. */ 3149 /* write the last buffer. */
3150 write_metapage(mp); 3150 write_metapage(mp);
3151 3151
3152 IREAD_UNLOCK(ipbmap); 3152 IREAD_UNLOCK(ipbmap);
3153 3153
3154 return (0); 3154 return (0);
3155 } 3155 }
3156 3156
3157 3157
3158 static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, 3158 static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3159 int nblocks) 3159 int nblocks)
3160 { 3160 {
3161 int rc; 3161 int rc;
3162 int dbitno, word, rembits, nb, nwords, wbitno, agno; 3162 int dbitno, word, rembits, nb, nwords, wbitno, agno;
3163 s8 oldroot, *leaf; 3163 s8 oldroot, *leaf;
3164 struct dmaptree *tp = (struct dmaptree *) & dp->tree; 3164 struct dmaptree *tp = (struct dmaptree *) & dp->tree;
3165 3165
3166 /* save the current value of the root (i.e. maximum free string) 3166 /* save the current value of the root (i.e. maximum free string)
3167 * of the dmap tree. 3167 * of the dmap tree.
3168 */ 3168 */
3169 oldroot = tp->stree[ROOT]; 3169 oldroot = tp->stree[ROOT];
3170 3170
3171 /* pick up a pointer to the leaves of the dmap tree */ 3171 /* pick up a pointer to the leaves of the dmap tree */
3172 leaf = tp->stree + LEAFIND; 3172 leaf = tp->stree + LEAFIND;
3173 3173
3174 /* determine the bit number and word within the dmap of the 3174 /* determine the bit number and word within the dmap of the
3175 * starting block. 3175 * starting block.
3176 */ 3176 */
3177 dbitno = blkno & (BPERDMAP - 1); 3177 dbitno = blkno & (BPERDMAP - 1);
3178 word = dbitno >> L2DBWORD; 3178 word = dbitno >> L2DBWORD;
3179 3179
3180 /* block range better be within the dmap */ 3180 /* block range better be within the dmap */
3181 assert(dbitno + nblocks <= BPERDMAP); 3181 assert(dbitno + nblocks <= BPERDMAP);
3182 3182
3183 /* allocate the bits of the dmap's words corresponding to the block 3183 /* allocate the bits of the dmap's words corresponding to the block
3184 * range. not all bits of the first and last words may be contained 3184 * range. not all bits of the first and last words may be contained
3185 * within the block range. if this is the case, we'll work against 3185 * within the block range. if this is the case, we'll work against
3186 * those words (i.e. partial first and/or last) on an individual basis 3186 * those words (i.e. partial first and/or last) on an individual basis
3187 * (a single pass), allocating the bits of interest by hand and 3187 * (a single pass), allocating the bits of interest by hand and
3188 * updating the leaf corresponding to the dmap word. a single pass 3188 * updating the leaf corresponding to the dmap word. a single pass
3189 * will be used for all dmap words fully contained within the 3189 * will be used for all dmap words fully contained within the
3190 * specified range. within this pass, the bits of all fully contained 3190 * specified range. within this pass, the bits of all fully contained
3191 * dmap words will be marked as free in a single shot and the leaves 3191 * dmap words will be marked as free in a single shot and the leaves
3192 * will be updated. a single leaf may describe the free space of 3192 * will be updated. a single leaf may describe the free space of
3193 * multiple dmap words, so we may update only a subset of the actual 3193 * multiple dmap words, so we may update only a subset of the actual
3194 * leaves corresponding to the dmap words of the block range. 3194 * leaves corresponding to the dmap words of the block range.
3195 */ 3195 */
3196 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { 3196 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
3197 /* determine the bit number within the word and 3197 /* determine the bit number within the word and
3198 * the number of bits within the word. 3198 * the number of bits within the word.
3199 */ 3199 */
3200 wbitno = dbitno & (DBWORD - 1); 3200 wbitno = dbitno & (DBWORD - 1);
3201 nb = min(rembits, DBWORD - wbitno); 3201 nb = min(rembits, DBWORD - wbitno);
3202 3202
3203 /* check if only part of a word is to be allocated. 3203 /* check if only part of a word is to be allocated.
3204 */ 3204 */
3205 if (nb < DBWORD) { 3205 if (nb < DBWORD) {
3206 /* allocate (set to 1) the appropriate bits within 3206 /* allocate (set to 1) the appropriate bits within
3207 * this dmap word. 3207 * this dmap word.
3208 */ 3208 */
3209 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) 3209 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
3210 >> wbitno); 3210 >> wbitno);
3211 3211
3212 word++; 3212 word++;
3213 } else { 3213 } else {
3214 /* one or more dmap words are fully contained 3214 /* one or more dmap words are fully contained
3215 * within the block range. determine how many 3215 * within the block range. determine how many
3216 * words and allocate (set to 1) the bits of these 3216 * words and allocate (set to 1) the bits of these
3217 * words. 3217 * words.
3218 */ 3218 */
3219 nwords = rembits >> L2DBWORD; 3219 nwords = rembits >> L2DBWORD;
3220 memset(&dp->wmap[word], (int) ONES, nwords * 4); 3220 memset(&dp->wmap[word], (int) ONES, nwords * 4);
3221 3221
3222 /* determine how many bits */ 3222 /* determine how many bits */
3223 nb = nwords << L2DBWORD; 3223 nb = nwords << L2DBWORD;
3224 word += nwords; 3224 word += nwords;
3225 } 3225 }
3226 } 3226 }
3227 3227
3228 /* update the free count for this dmap */ 3228 /* update the free count for this dmap */
3229 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); 3229 le32_add_cpu(&dp->nfree, -nblocks);
3230 3230
3231 /* reconstruct summary tree */ 3231 /* reconstruct summary tree */
3232 dbInitDmapTree(dp); 3232 dbInitDmapTree(dp);
3233 3233
3234 BMAP_LOCK(bmp); 3234 BMAP_LOCK(bmp);
3235 3235
3236 /* if this allocation group is completely free, 3236 /* if this allocation group is completely free,
3237 * update the highest active allocation group number 3237 * update the highest active allocation group number
3238 * if this allocation group is the new max. 3238 * if this allocation group is the new max.
3239 */ 3239 */
3240 agno = blkno >> bmp->db_agl2size; 3240 agno = blkno >> bmp->db_agl2size;
3241 if (agno > bmp->db_maxag) 3241 if (agno > bmp->db_maxag)
3242 bmp->db_maxag = agno; 3242 bmp->db_maxag = agno;
3243 3243
3244 /* update the free count for the allocation group and map */ 3244 /* update the free count for the allocation group and map */
3245 bmp->db_agfree[agno] -= nblocks; 3245 bmp->db_agfree[agno] -= nblocks;
3246 bmp->db_nfree -= nblocks; 3246 bmp->db_nfree -= nblocks;
3247 3247
3248 BMAP_UNLOCK(bmp); 3248 BMAP_UNLOCK(bmp);
3249 3249
3250 /* if the root has not changed, done. */ 3250 /* if the root has not changed, done. */
3251 if (tp->stree[ROOT] == oldroot) 3251 if (tp->stree[ROOT] == oldroot)
3252 return (0); 3252 return (0);
3253 3253
3254 /* root changed. bubble the change up to the dmap control pages. 3254 /* root changed. bubble the change up to the dmap control pages.
3255 * if the adjustment of the upper level control pages fails, 3255 * if the adjustment of the upper level control pages fails,
3256 * backout the bit allocation (thus making everything consistent). 3256 * backout the bit allocation (thus making everything consistent).
3257 */ 3257 */
3258 if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0))) 3258 if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0)))
3259 dbFreeBits(bmp, dp, blkno, nblocks); 3259 dbFreeBits(bmp, dp, blkno, nblocks);
3260 3260
3261 return (rc); 3261 return (rc);
3262 } 3262 }
3263 3263
3264 3264
3265 /* 3265 /*
3266 * NAME: dbExtendFS() 3266 * NAME: dbExtendFS()
3267 * 3267 *
3268 * FUNCTION: extend bmap from blkno for nblocks; 3268 * FUNCTION: extend bmap from blkno for nblocks;
3269 * dbExtendFS() updates bmap ready for dbAllocBottomUp(); 3269 * dbExtendFS() updates bmap ready for dbAllocBottomUp();
3270 * 3270 *
3271 * L2 3271 * L2
3272 * | 3272 * |
3273 * L1---------------------------------L1 3273 * L1---------------------------------L1
3274 * | | 3274 * | |
3275 * L0---------L0---------L0 L0---------L0---------L0 3275 * L0---------L0---------L0 L0---------L0---------L0
3276 * | | | | | | 3276 * | | | | | |
3277 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; 3277 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
3278 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm 3278 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
3279 * 3279 *
3280 * <---old---><----------------------------extend-----------------------> 3280 * <---old---><----------------------------extend----------------------->
3281 */ 3281 */
3282 int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) 3282 int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3283 { 3283 {
3284 struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); 3284 struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb);
3285 int nbperpage = sbi->nbperpage; 3285 int nbperpage = sbi->nbperpage;
3286 int i, i0 = true, j, j0 = true, k, n; 3286 int i, i0 = true, j, j0 = true, k, n;
3287 s64 newsize; 3287 s64 newsize;
3288 s64 p; 3288 s64 p;
3289 struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL; 3289 struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL;
3290 struct dmapctl *l2dcp, *l1dcp, *l0dcp; 3290 struct dmapctl *l2dcp, *l1dcp, *l0dcp;
3291 struct dmap *dp; 3291 struct dmap *dp;
3292 s8 *l0leaf, *l1leaf, *l2leaf; 3292 s8 *l0leaf, *l1leaf, *l2leaf;
3293 struct bmap *bmp = sbi->bmap; 3293 struct bmap *bmp = sbi->bmap;
3294 int agno, l2agsize, oldl2agsize; 3294 int agno, l2agsize, oldl2agsize;
3295 s64 ag_rem; 3295 s64 ag_rem;
3296 3296
3297 newsize = blkno + nblocks; 3297 newsize = blkno + nblocks;
3298 3298
3299 jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld", 3299 jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld",
3300 (long long) blkno, (long long) nblocks, (long long) newsize); 3300 (long long) blkno, (long long) nblocks, (long long) newsize);
3301 3301
3302 /* 3302 /*
3303 * initialize bmap control page. 3303 * initialize bmap control page.
3304 * 3304 *
3305 * all the data in bmap control page should exclude 3305 * all the data in bmap control page should exclude
3306 * the mkfs hidden dmap page. 3306 * the mkfs hidden dmap page.
3307 */ 3307 */
3308 3308
3309 /* update mapsize */ 3309 /* update mapsize */
3310 bmp->db_mapsize = newsize; 3310 bmp->db_mapsize = newsize;
3311 bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize); 3311 bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize);
3312 3312
3313 /* compute new AG size */ 3313 /* compute new AG size */
3314 l2agsize = dbGetL2AGSize(newsize); 3314 l2agsize = dbGetL2AGSize(newsize);
3315 oldl2agsize = bmp->db_agl2size; 3315 oldl2agsize = bmp->db_agl2size;
3316 3316
3317 bmp->db_agl2size = l2agsize; 3317 bmp->db_agl2size = l2agsize;
3318 bmp->db_agsize = 1 << l2agsize; 3318 bmp->db_agsize = 1 << l2agsize;
3319 3319
3320 /* compute new number of AG */ 3320 /* compute new number of AG */
3321 agno = bmp->db_numag; 3321 agno = bmp->db_numag;
3322 bmp->db_numag = newsize >> l2agsize; 3322 bmp->db_numag = newsize >> l2agsize;
3323 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; 3323 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
3324 3324
3325 /* 3325 /*
3326 * reconfigure db_agfree[] 3326 * reconfigure db_agfree[]
3327 * from old AG configuration to new AG configuration; 3327 * from old AG configuration to new AG configuration;
3328 * 3328 *
3329 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 3329 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
3330 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 3330 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
3331 * note: new AG size = old AG size * (2**x). 3331 * note: new AG size = old AG size * (2**x).
3332 */ 3332 */
3333 if (l2agsize == oldl2agsize) 3333 if (l2agsize == oldl2agsize)
3334 goto extend; 3334 goto extend;
3335 k = 1 << (l2agsize - oldl2agsize); 3335 k = 1 << (l2agsize - oldl2agsize);
3336 ag_rem = bmp->db_agfree[0]; /* save agfree[0] */ 3336 ag_rem = bmp->db_agfree[0]; /* save agfree[0] */
3337 for (i = 0, n = 0; i < agno; n++) { 3337 for (i = 0, n = 0; i < agno; n++) {
3338 bmp->db_agfree[n] = 0; /* init collection point */ 3338 bmp->db_agfree[n] = 0; /* init collection point */
3339 3339
3340 /* coalesce cotiguous k AGs; */ 3340 /* coalesce cotiguous k AGs; */
3341 for (j = 0; j < k && i < agno; j++, i++) { 3341 for (j = 0; j < k && i < agno; j++, i++) {
3342 /* merge AGi to AGn */ 3342 /* merge AGi to AGn */
3343 bmp->db_agfree[n] += bmp->db_agfree[i]; 3343 bmp->db_agfree[n] += bmp->db_agfree[i];
3344 } 3344 }
3345 } 3345 }
3346 bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */ 3346 bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */
3347 3347
3348 for (; n < MAXAG; n++) 3348 for (; n < MAXAG; n++)
3349 bmp->db_agfree[n] = 0; 3349 bmp->db_agfree[n] = 0;
3350 3350
3351 /* 3351 /*
3352 * update highest active ag number 3352 * update highest active ag number
3353 */ 3353 */
3354 3354
3355 bmp->db_maxag = bmp->db_maxag / k; 3355 bmp->db_maxag = bmp->db_maxag / k;
3356 3356
3357 /* 3357 /*
3358 * extend bmap 3358 * extend bmap
3359 * 3359 *
3360 * update bit maps and corresponding level control pages; 3360 * update bit maps and corresponding level control pages;
3361 * global control page db_nfree, db_agfree[agno], db_maxfreebud; 3361 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
3362 */ 3362 */
3363 extend: 3363 extend:
3364 /* get L2 page */ 3364 /* get L2 page */
3365 p = BMAPBLKNO + nbperpage; /* L2 page */ 3365 p = BMAPBLKNO + nbperpage; /* L2 page */
3366 l2mp = read_metapage(ipbmap, p, PSIZE, 0); 3366 l2mp = read_metapage(ipbmap, p, PSIZE, 0);
3367 if (!l2mp) { 3367 if (!l2mp) {
3368 jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); 3368 jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read");
3369 return -EIO; 3369 return -EIO;
3370 } 3370 }
3371 l2dcp = (struct dmapctl *) l2mp->data; 3371 l2dcp = (struct dmapctl *) l2mp->data;
3372 3372
3373 /* compute start L1 */ 3373 /* compute start L1 */
3374 k = blkno >> L2MAXL1SIZE; 3374 k = blkno >> L2MAXL1SIZE;
3375 l2leaf = l2dcp->stree + CTLLEAFIND + k; 3375 l2leaf = l2dcp->stree + CTLLEAFIND + k;
3376 p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */ 3376 p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */
3377 3377
3378 /* 3378 /*
3379 * extend each L1 in L2 3379 * extend each L1 in L2
3380 */ 3380 */
3381 for (; k < LPERCTL; k++, p += nbperpage) { 3381 for (; k < LPERCTL; k++, p += nbperpage) {
3382 /* get L1 page */ 3382 /* get L1 page */
3383 if (j0) { 3383 if (j0) {
3384 /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */ 3384 /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */
3385 l1mp = read_metapage(ipbmap, p, PSIZE, 0); 3385 l1mp = read_metapage(ipbmap, p, PSIZE, 0);
3386 if (l1mp == NULL) 3386 if (l1mp == NULL)
3387 goto errout; 3387 goto errout;
3388 l1dcp = (struct dmapctl *) l1mp->data; 3388 l1dcp = (struct dmapctl *) l1mp->data;
3389 3389
3390 /* compute start L0 */ 3390 /* compute start L0 */
3391 j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; 3391 j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE;
3392 l1leaf = l1dcp->stree + CTLLEAFIND + j; 3392 l1leaf = l1dcp->stree + CTLLEAFIND + j;
3393 p = BLKTOL0(blkno, sbi->l2nbperpage); 3393 p = BLKTOL0(blkno, sbi->l2nbperpage);
3394 j0 = false; 3394 j0 = false;
3395 } else { 3395 } else {
3396 /* assign/init L1 page */ 3396 /* assign/init L1 page */
3397 l1mp = get_metapage(ipbmap, p, PSIZE, 0); 3397 l1mp = get_metapage(ipbmap, p, PSIZE, 0);
3398 if (l1mp == NULL) 3398 if (l1mp == NULL)
3399 goto errout; 3399 goto errout;
3400 3400
3401 l1dcp = (struct dmapctl *) l1mp->data; 3401 l1dcp = (struct dmapctl *) l1mp->data;
3402 3402
3403 /* compute start L0 */ 3403 /* compute start L0 */
3404 j = 0; 3404 j = 0;
3405 l1leaf = l1dcp->stree + CTLLEAFIND; 3405 l1leaf = l1dcp->stree + CTLLEAFIND;
3406 p += nbperpage; /* 1st L0 of L1.k */ 3406 p += nbperpage; /* 1st L0 of L1.k */
3407 } 3407 }
3408 3408
3409 /* 3409 /*
3410 * extend each L0 in L1 3410 * extend each L0 in L1
3411 */ 3411 */
3412 for (; j < LPERCTL; j++) { 3412 for (; j < LPERCTL; j++) {
3413 /* get L0 page */ 3413 /* get L0 page */
3414 if (i0) { 3414 if (i0) {
3415 /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */ 3415 /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */
3416 3416
3417 l0mp = read_metapage(ipbmap, p, PSIZE, 0); 3417 l0mp = read_metapage(ipbmap, p, PSIZE, 0);
3418 if (l0mp == NULL) 3418 if (l0mp == NULL)
3419 goto errout; 3419 goto errout;
3420 l0dcp = (struct dmapctl *) l0mp->data; 3420 l0dcp = (struct dmapctl *) l0mp->data;
3421 3421
3422 /* compute start dmap */ 3422 /* compute start dmap */
3423 i = (blkno & (MAXL0SIZE - 1)) >> 3423 i = (blkno & (MAXL0SIZE - 1)) >>
3424 L2BPERDMAP; 3424 L2BPERDMAP;
3425 l0leaf = l0dcp->stree + CTLLEAFIND + i; 3425 l0leaf = l0dcp->stree + CTLLEAFIND + i;
3426 p = BLKTODMAP(blkno, 3426 p = BLKTODMAP(blkno,
3427 sbi->l2nbperpage); 3427 sbi->l2nbperpage);
3428 i0 = false; 3428 i0 = false;
3429 } else { 3429 } else {
3430 /* assign/init L0 page */ 3430 /* assign/init L0 page */
3431 l0mp = get_metapage(ipbmap, p, PSIZE, 0); 3431 l0mp = get_metapage(ipbmap, p, PSIZE, 0);
3432 if (l0mp == NULL) 3432 if (l0mp == NULL)
3433 goto errout; 3433 goto errout;
3434 3434
3435 l0dcp = (struct dmapctl *) l0mp->data; 3435 l0dcp = (struct dmapctl *) l0mp->data;
3436 3436
3437 /* compute start dmap */ 3437 /* compute start dmap */
3438 i = 0; 3438 i = 0;
3439 l0leaf = l0dcp->stree + CTLLEAFIND; 3439 l0leaf = l0dcp->stree + CTLLEAFIND;
3440 p += nbperpage; /* 1st dmap of L0.j */ 3440 p += nbperpage; /* 1st dmap of L0.j */
3441 } 3441 }
3442 3442
3443 /* 3443 /*
3444 * extend each dmap in L0 3444 * extend each dmap in L0
3445 */ 3445 */
3446 for (; i < LPERCTL; i++) { 3446 for (; i < LPERCTL; i++) {
3447 /* 3447 /*
3448 * reconstruct the dmap page, and 3448 * reconstruct the dmap page, and
3449 * initialize corresponding parent L0 leaf 3449 * initialize corresponding parent L0 leaf
3450 */ 3450 */
3451 if ((n = blkno & (BPERDMAP - 1))) { 3451 if ((n = blkno & (BPERDMAP - 1))) {
3452 /* read in dmap page: */ 3452 /* read in dmap page: */
3453 mp = read_metapage(ipbmap, p, 3453 mp = read_metapage(ipbmap, p,
3454 PSIZE, 0); 3454 PSIZE, 0);
3455 if (mp == NULL) 3455 if (mp == NULL)
3456 goto errout; 3456 goto errout;
3457 n = min(nblocks, (s64)BPERDMAP - n); 3457 n = min(nblocks, (s64)BPERDMAP - n);
3458 } else { 3458 } else {
3459 /* assign/init dmap page */ 3459 /* assign/init dmap page */
3460 mp = read_metapage(ipbmap, p, 3460 mp = read_metapage(ipbmap, p,
3461 PSIZE, 0); 3461 PSIZE, 0);
3462 if (mp == NULL) 3462 if (mp == NULL)
3463 goto errout; 3463 goto errout;
3464 3464
3465 n = min(nblocks, (s64)BPERDMAP); 3465 n = min(nblocks, (s64)BPERDMAP);
3466 } 3466 }
3467 3467
3468 dp = (struct dmap *) mp->data; 3468 dp = (struct dmap *) mp->data;
3469 *l0leaf = dbInitDmap(dp, blkno, n); 3469 *l0leaf = dbInitDmap(dp, blkno, n);
3470 3470
3471 bmp->db_nfree += n; 3471 bmp->db_nfree += n;
3472 agno = le64_to_cpu(dp->start) >> l2agsize; 3472 agno = le64_to_cpu(dp->start) >> l2agsize;
3473 bmp->db_agfree[agno] += n; 3473 bmp->db_agfree[agno] += n;
3474 3474
3475 write_metapage(mp); 3475 write_metapage(mp);
3476 3476
3477 l0leaf++; 3477 l0leaf++;
3478 p += nbperpage; 3478 p += nbperpage;
3479 3479
3480 blkno += n; 3480 blkno += n;
3481 nblocks -= n; 3481 nblocks -= n;
3482 if (nblocks == 0) 3482 if (nblocks == 0)
3483 break; 3483 break;
3484 } /* for each dmap in a L0 */ 3484 } /* for each dmap in a L0 */
3485 3485
3486 /* 3486 /*
3487 * build current L0 page from its leaves, and 3487 * build current L0 page from its leaves, and
3488 * initialize corresponding parent L1 leaf 3488 * initialize corresponding parent L1 leaf
3489 */ 3489 */
3490 *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); 3490 *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i);
3491 write_metapage(l0mp); 3491 write_metapage(l0mp);
3492 l0mp = NULL; 3492 l0mp = NULL;
3493 3493
3494 if (nblocks) 3494 if (nblocks)
3495 l1leaf++; /* continue for next L0 */ 3495 l1leaf++; /* continue for next L0 */
3496 else { 3496 else {
3497 /* more than 1 L0 ? */ 3497 /* more than 1 L0 ? */
3498 if (j > 0) 3498 if (j > 0)
3499 break; /* build L1 page */ 3499 break; /* build L1 page */
3500 else { 3500 else {
3501 /* summarize in global bmap page */ 3501 /* summarize in global bmap page */
3502 bmp->db_maxfreebud = *l1leaf; 3502 bmp->db_maxfreebud = *l1leaf;
3503 release_metapage(l1mp); 3503 release_metapage(l1mp);
3504 release_metapage(l2mp); 3504 release_metapage(l2mp);
3505 goto finalize; 3505 goto finalize;
3506 } 3506 }
3507 } 3507 }
3508 } /* for each L0 in a L1 */ 3508 } /* for each L0 in a L1 */
3509 3509
3510 /* 3510 /*
3511 * build current L1 page from its leaves, and 3511 * build current L1 page from its leaves, and
3512 * initialize corresponding parent L2 leaf 3512 * initialize corresponding parent L2 leaf
3513 */ 3513 */
3514 *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); 3514 *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j);
3515 write_metapage(l1mp); 3515 write_metapage(l1mp);
3516 l1mp = NULL; 3516 l1mp = NULL;
3517 3517
3518 if (nblocks) 3518 if (nblocks)
3519 l2leaf++; /* continue for next L1 */ 3519 l2leaf++; /* continue for next L1 */
3520 else { 3520 else {
3521 /* more than 1 L1 ? */ 3521 /* more than 1 L1 ? */
3522 if (k > 0) 3522 if (k > 0)
3523 break; /* build L2 page */ 3523 break; /* build L2 page */
3524 else { 3524 else {
3525 /* summarize in global bmap page */ 3525 /* summarize in global bmap page */
3526 bmp->db_maxfreebud = *l2leaf; 3526 bmp->db_maxfreebud = *l2leaf;
3527 release_metapage(l2mp); 3527 release_metapage(l2mp);
3528 goto finalize; 3528 goto finalize;
3529 } 3529 }
3530 } 3530 }
3531 } /* for each L1 in a L2 */ 3531 } /* for each L1 in a L2 */
3532 3532
3533 jfs_error(ipbmap->i_sb, 3533 jfs_error(ipbmap->i_sb,
3534 "dbExtendFS: function has not returned as expected"); 3534 "dbExtendFS: function has not returned as expected");
3535 errout: 3535 errout:
3536 if (l0mp) 3536 if (l0mp)
3537 release_metapage(l0mp); 3537 release_metapage(l0mp);
3538 if (l1mp) 3538 if (l1mp)
3539 release_metapage(l1mp); 3539 release_metapage(l1mp);
3540 release_metapage(l2mp); 3540 release_metapage(l2mp);
3541 return -EIO; 3541 return -EIO;
3542 3542
3543 /* 3543 /*
3544 * finalize bmap control page 3544 * finalize bmap control page
3545 */ 3545 */
3546 finalize: 3546 finalize:
3547 3547
3548 return 0; 3548 return 0;
3549 } 3549 }
3550 3550
3551 3551
3552 /* 3552 /*
3553 * dbFinalizeBmap() 3553 * dbFinalizeBmap()
3554 */ 3554 */
3555 void dbFinalizeBmap(struct inode *ipbmap) 3555 void dbFinalizeBmap(struct inode *ipbmap)
3556 { 3556 {
3557 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 3557 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
3558 int actags, inactags, l2nl; 3558 int actags, inactags, l2nl;
3559 s64 ag_rem, actfree, inactfree, avgfree; 3559 s64 ag_rem, actfree, inactfree, avgfree;
3560 int i, n; 3560 int i, n;
3561 3561
3562 /* 3562 /*
3563 * finalize bmap control page 3563 * finalize bmap control page
3564 */ 3564 */
3565 //finalize: 3565 //finalize:
3566 /* 3566 /*
3567 * compute db_agpref: preferred ag to allocate from 3567 * compute db_agpref: preferred ag to allocate from
3568 * (the leftmost ag with average free space in it); 3568 * (the leftmost ag with average free space in it);
3569 */ 3569 */
3570 //agpref: 3570 //agpref:
3571 /* get the number of active ags and inacitve ags */ 3571 /* get the number of active ags and inacitve ags */
3572 actags = bmp->db_maxag + 1; 3572 actags = bmp->db_maxag + 1;
3573 inactags = bmp->db_numag - actags; 3573 inactags = bmp->db_numag - actags;
3574 ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */ 3574 ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */
3575 3575
3576 /* determine how many blocks are in the inactive allocation 3576 /* determine how many blocks are in the inactive allocation
3577 * groups. in doing this, we must account for the fact that 3577 * groups. in doing this, we must account for the fact that
3578 * the rightmost group might be a partial group (i.e. file 3578 * the rightmost group might be a partial group (i.e. file
3579 * system size is not a multiple of the group size). 3579 * system size is not a multiple of the group size).
3580 */ 3580 */
3581 inactfree = (inactags && ag_rem) ? 3581 inactfree = (inactags && ag_rem) ?
3582 ((inactags - 1) << bmp->db_agl2size) + ag_rem 3582 ((inactags - 1) << bmp->db_agl2size) + ag_rem
3583 : inactags << bmp->db_agl2size; 3583 : inactags << bmp->db_agl2size;
3584 3584
3585 /* determine how many free blocks are in the active 3585 /* determine how many free blocks are in the active
3586 * allocation groups plus the average number of free blocks 3586 * allocation groups plus the average number of free blocks
3587 * within the active ags. 3587 * within the active ags.
3588 */ 3588 */
3589 actfree = bmp->db_nfree - inactfree; 3589 actfree = bmp->db_nfree - inactfree;
3590 avgfree = (u32) actfree / (u32) actags; 3590 avgfree = (u32) actfree / (u32) actags;
3591 3591
3592 /* if the preferred allocation group has not average free space. 3592 /* if the preferred allocation group has not average free space.
3593 * re-establish the preferred group as the leftmost 3593 * re-establish the preferred group as the leftmost
3594 * group with average free space. 3594 * group with average free space.
3595 */ 3595 */
3596 if (bmp->db_agfree[bmp->db_agpref] < avgfree) { 3596 if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
3597 for (bmp->db_agpref = 0; bmp->db_agpref < actags; 3597 for (bmp->db_agpref = 0; bmp->db_agpref < actags;
3598 bmp->db_agpref++) { 3598 bmp->db_agpref++) {
3599 if (bmp->db_agfree[bmp->db_agpref] >= avgfree) 3599 if (bmp->db_agfree[bmp->db_agpref] >= avgfree)
3600 break; 3600 break;
3601 } 3601 }
3602 if (bmp->db_agpref >= bmp->db_numag) { 3602 if (bmp->db_agpref >= bmp->db_numag) {
3603 jfs_error(ipbmap->i_sb, 3603 jfs_error(ipbmap->i_sb,
3604 "cannot find ag with average freespace"); 3604 "cannot find ag with average freespace");
3605 } 3605 }
3606 } 3606 }
3607 3607
3608 /* 3608 /*
3609 * compute db_aglevel, db_agheigth, db_width, db_agstart: 3609 * compute db_aglevel, db_agheigth, db_width, db_agstart:
3610 * an ag is covered in aglevel dmapctl summary tree, 3610 * an ag is covered in aglevel dmapctl summary tree,
3611 * at agheight level height (from leaf) with agwidth number of nodes 3611 * at agheight level height (from leaf) with agwidth number of nodes
3612 * each, which starts at agstart index node of the smmary tree node 3612 * each, which starts at agstart index node of the smmary tree node
3613 * array; 3613 * array;
3614 */ 3614 */
3615 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); 3615 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
3616 l2nl = 3616 l2nl =
3617 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); 3617 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
3618 bmp->db_agheigth = l2nl >> 1; 3618 bmp->db_agheigth = l2nl >> 1;
3619 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); 3619 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1));
3620 for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; 3620 for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0;
3621 i--) { 3621 i--) {
3622 bmp->db_agstart += n; 3622 bmp->db_agstart += n;
3623 n <<= 2; 3623 n <<= 2;
3624 } 3624 }
3625 3625
3626 } 3626 }
3627 3627
3628 3628
3629 /* 3629 /*
3630 * NAME: dbInitDmap()/ujfs_idmap_page() 3630 * NAME: dbInitDmap()/ujfs_idmap_page()
3631 * 3631 *
3632 * FUNCTION: initialize working/persistent bitmap of the dmap page 3632 * FUNCTION: initialize working/persistent bitmap of the dmap page
3633 * for the specified number of blocks: 3633 * for the specified number of blocks:
3634 * 3634 *
3635 * at entry, the bitmaps had been initialized as free (ZEROS); 3635 * at entry, the bitmaps had been initialized as free (ZEROS);
3636 * The number of blocks will only account for the actually 3636 * The number of blocks will only account for the actually
3637 * existing blocks. Blocks which don't actually exist in 3637 * existing blocks. Blocks which don't actually exist in
3638 * the aggregate will be marked as allocated (ONES); 3638 * the aggregate will be marked as allocated (ONES);
3639 * 3639 *
3640 * PARAMETERS: 3640 * PARAMETERS:
3641 * dp - pointer to page of map 3641 * dp - pointer to page of map
3642 * nblocks - number of blocks this page 3642 * nblocks - number of blocks this page
3643 * 3643 *
3644 * RETURNS: NONE 3644 * RETURNS: NONE
3645 */ 3645 */
3646 static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) 3646 static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
3647 { 3647 {
3648 int blkno, w, b, r, nw, nb, i; 3648 int blkno, w, b, r, nw, nb, i;
3649 3649
3650 /* starting block number within the dmap */ 3650 /* starting block number within the dmap */
3651 blkno = Blkno & (BPERDMAP - 1); 3651 blkno = Blkno & (BPERDMAP - 1);
3652 3652
3653 if (blkno == 0) { 3653 if (blkno == 0) {
3654 dp->nblocks = dp->nfree = cpu_to_le32(nblocks); 3654 dp->nblocks = dp->nfree = cpu_to_le32(nblocks);
3655 dp->start = cpu_to_le64(Blkno); 3655 dp->start = cpu_to_le64(Blkno);
3656 3656
3657 if (nblocks == BPERDMAP) { 3657 if (nblocks == BPERDMAP) {
3658 memset(&dp->wmap[0], 0, LPERDMAP * 4); 3658 memset(&dp->wmap[0], 0, LPERDMAP * 4);
3659 memset(&dp->pmap[0], 0, LPERDMAP * 4); 3659 memset(&dp->pmap[0], 0, LPERDMAP * 4);
3660 goto initTree; 3660 goto initTree;
3661 } 3661 }
3662 } else { 3662 } else {
3663 dp->nblocks = 3663 le32_add_cpu(&dp->nblocks, nblocks);
3664 cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks); 3664 le32_add_cpu(&dp->nfree, nblocks);
3665 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
3666 } 3665 }
3667 3666
3668 /* word number containing start block number */ 3667 /* word number containing start block number */
3669 w = blkno >> L2DBWORD; 3668 w = blkno >> L2DBWORD;
3670 3669
3671 /* 3670 /*
3672 * free the bits corresponding to the block range (ZEROS): 3671 * free the bits corresponding to the block range (ZEROS):
3673 * note: not all bits of the first and last words may be contained 3672 * note: not all bits of the first and last words may be contained
3674 * within the block range. 3673 * within the block range.
3675 */ 3674 */
3676 for (r = nblocks; r > 0; r -= nb, blkno += nb) { 3675 for (r = nblocks; r > 0; r -= nb, blkno += nb) {
3677 /* number of bits preceding range to be freed in the word */ 3676 /* number of bits preceding range to be freed in the word */
3678 b = blkno & (DBWORD - 1); 3677 b = blkno & (DBWORD - 1);
3679 /* number of bits to free in the word */ 3678 /* number of bits to free in the word */
3680 nb = min(r, DBWORD - b); 3679 nb = min(r, DBWORD - b);
3681 3680
3682 /* is partial word to be freed ? */ 3681 /* is partial word to be freed ? */
3683 if (nb < DBWORD) { 3682 if (nb < DBWORD) {
3684 /* free (set to 0) from the bitmap word */ 3683 /* free (set to 0) from the bitmap word */
3685 dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) 3684 dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
3686 >> b)); 3685 >> b));
3687 dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) 3686 dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
3688 >> b)); 3687 >> b));
3689 3688
3690 /* skip the word freed */ 3689 /* skip the word freed */
3691 w++; 3690 w++;
3692 } else { 3691 } else {
3693 /* free (set to 0) contiguous bitmap words */ 3692 /* free (set to 0) contiguous bitmap words */
3694 nw = r >> L2DBWORD; 3693 nw = r >> L2DBWORD;
3695 memset(&dp->wmap[w], 0, nw * 4); 3694 memset(&dp->wmap[w], 0, nw * 4);
3696 memset(&dp->pmap[w], 0, nw * 4); 3695 memset(&dp->pmap[w], 0, nw * 4);
3697 3696
3698 /* skip the words freed */ 3697 /* skip the words freed */
3699 nb = nw << L2DBWORD; 3698 nb = nw << L2DBWORD;
3700 w += nw; 3699 w += nw;
3701 } 3700 }
3702 } 3701 }
3703 3702
3704 /* 3703 /*
3705 * mark bits following the range to be freed (non-existing 3704 * mark bits following the range to be freed (non-existing
3706 * blocks) as allocated (ONES) 3705 * blocks) as allocated (ONES)
3707 */ 3706 */
3708 3707
3709 if (blkno == BPERDMAP) 3708 if (blkno == BPERDMAP)
3710 goto initTree; 3709 goto initTree;
3711 3710
3712 /* the first word beyond the end of existing blocks */ 3711 /* the first word beyond the end of existing blocks */
3713 w = blkno >> L2DBWORD; 3712 w = blkno >> L2DBWORD;
3714 3713
3715 /* does nblocks fall on a 32-bit boundary ? */ 3714 /* does nblocks fall on a 32-bit boundary ? */
3716 b = blkno & (DBWORD - 1); 3715 b = blkno & (DBWORD - 1);
3717 if (b) { 3716 if (b) {
3718 /* mark a partial word allocated */ 3717 /* mark a partial word allocated */
3719 dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b); 3718 dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b);
3720 w++; 3719 w++;
3721 } 3720 }
3722 3721
3723 /* set the rest of the words in the page to allocated (ONES) */ 3722 /* set the rest of the words in the page to allocated (ONES) */
3724 for (i = w; i < LPERDMAP; i++) 3723 for (i = w; i < LPERDMAP; i++)
3725 dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES); 3724 dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES);
3726 3725
3727 /* 3726 /*
3728 * init tree 3727 * init tree
3729 */ 3728 */
3730 initTree: 3729 initTree:
3731 return (dbInitDmapTree(dp)); 3730 return (dbInitDmapTree(dp));
3732 } 3731 }
3733 3732
3734 3733
3735 /* 3734 /*
3736 * NAME: dbInitDmapTree()/ujfs_complete_dmap() 3735 * NAME: dbInitDmapTree()/ujfs_complete_dmap()
3737 * 3736 *
3738 * FUNCTION: initialize summary tree of the specified dmap: 3737 * FUNCTION: initialize summary tree of the specified dmap:
3739 * 3738 *
3740 * at entry, bitmap of the dmap has been initialized; 3739 * at entry, bitmap of the dmap has been initialized;
3741 * 3740 *
3742 * PARAMETERS: 3741 * PARAMETERS:
3743 * dp - dmap to complete 3742 * dp - dmap to complete
3744 * blkno - starting block number for this dmap 3743 * blkno - starting block number for this dmap
3745 * treemax - will be filled in with max free for this dmap 3744 * treemax - will be filled in with max free for this dmap
3746 * 3745 *
3747 * RETURNS: max free string at the root of the tree 3746 * RETURNS: max free string at the root of the tree
3748 */ 3747 */
3749 static int dbInitDmapTree(struct dmap * dp) 3748 static int dbInitDmapTree(struct dmap * dp)
3750 { 3749 {
3751 struct dmaptree *tp; 3750 struct dmaptree *tp;
3752 s8 *cp; 3751 s8 *cp;
3753 int i; 3752 int i;
3754 3753
3755 /* init fixed info of tree */ 3754 /* init fixed info of tree */
3756 tp = &dp->tree; 3755 tp = &dp->tree;
3757 tp->nleafs = cpu_to_le32(LPERDMAP); 3756 tp->nleafs = cpu_to_le32(LPERDMAP);
3758 tp->l2nleafs = cpu_to_le32(L2LPERDMAP); 3757 tp->l2nleafs = cpu_to_le32(L2LPERDMAP);
3759 tp->leafidx = cpu_to_le32(LEAFIND); 3758 tp->leafidx = cpu_to_le32(LEAFIND);
3760 tp->height = cpu_to_le32(4); 3759 tp->height = cpu_to_le32(4);
3761 tp->budmin = BUDMIN; 3760 tp->budmin = BUDMIN;
3762 3761
3763 /* init each leaf from corresponding wmap word: 3762 /* init each leaf from corresponding wmap word:
3764 * note: leaf is set to NOFREE(-1) if all blocks of corresponding 3763 * note: leaf is set to NOFREE(-1) if all blocks of corresponding
3765 * bitmap word are allocated. 3764 * bitmap word are allocated.
3766 */ 3765 */
3767 cp = tp->stree + le32_to_cpu(tp->leafidx); 3766 cp = tp->stree + le32_to_cpu(tp->leafidx);
3768 for (i = 0; i < LPERDMAP; i++) 3767 for (i = 0; i < LPERDMAP; i++)
3769 *cp++ = dbMaxBud((u8 *) & dp->wmap[i]); 3768 *cp++ = dbMaxBud((u8 *) & dp->wmap[i]);
3770 3769
3771 /* build the dmap's binary buddy summary tree */ 3770 /* build the dmap's binary buddy summary tree */
3772 return (dbInitTree(tp)); 3771 return (dbInitTree(tp));
3773 } 3772 }
3774 3773
3775 3774
3776 /* 3775 /*
3777 * NAME: dbInitTree()/ujfs_adjtree() 3776 * NAME: dbInitTree()/ujfs_adjtree()
3778 * 3777 *
3779 * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. 3778 * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl.
3780 * 3779 *
3781 * at entry, the leaves of the tree has been initialized 3780 * at entry, the leaves of the tree has been initialized
3782 * from corresponding bitmap word or root of summary tree 3781 * from corresponding bitmap word or root of summary tree
3783 * of the child control page; 3782 * of the child control page;
3784 * configure binary buddy system at the leaf level, then 3783 * configure binary buddy system at the leaf level, then
3785 * bubble up the values of the leaf nodes up the tree. 3784 * bubble up the values of the leaf nodes up the tree.
3786 * 3785 *
3787 * PARAMETERS: 3786 * PARAMETERS:
3788 * cp - Pointer to the root of the tree 3787 * cp - Pointer to the root of the tree
3789 * l2leaves- Number of leaf nodes as a power of 2 3788 * l2leaves- Number of leaf nodes as a power of 2
3790 * l2min - Number of blocks that can be covered by a leaf 3789 * l2min - Number of blocks that can be covered by a leaf
3791 * as a power of 2 3790 * as a power of 2
3792 * 3791 *
3793 * RETURNS: max free string at the root of the tree 3792 * RETURNS: max free string at the root of the tree
3794 */ 3793 */
3795 static int dbInitTree(struct dmaptree * dtp) 3794 static int dbInitTree(struct dmaptree * dtp)
3796 { 3795 {
3797 int l2max, l2free, bsize, nextb, i; 3796 int l2max, l2free, bsize, nextb, i;
3798 int child, parent, nparent; 3797 int child, parent, nparent;
3799 s8 *tp, *cp, *cp1; 3798 s8 *tp, *cp, *cp1;
3800 3799
3801 tp = dtp->stree; 3800 tp = dtp->stree;
3802 3801
3803 /* Determine the maximum free string possible for the leaves */ 3802 /* Determine the maximum free string possible for the leaves */
3804 l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin; 3803 l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin;
3805 3804
3806 /* 3805 /*
3807 * configure the leaf levevl into binary buddy system 3806 * configure the leaf levevl into binary buddy system
3808 * 3807 *
3809 * Try to combine buddies starting with a buddy size of 1 3808 * Try to combine buddies starting with a buddy size of 1
3810 * (i.e. two leaves). At a buddy size of 1 two buddy leaves 3809 * (i.e. two leaves). At a buddy size of 1 two buddy leaves
3811 * can be combined if both buddies have a maximum free of l2min; 3810 * can be combined if both buddies have a maximum free of l2min;
3812 * the combination will result in the left-most buddy leaf having 3811 * the combination will result in the left-most buddy leaf having
3813 * a maximum free of l2min+1. 3812 * a maximum free of l2min+1.
3814 * After processing all buddies for a given size, process buddies 3813 * After processing all buddies for a given size, process buddies
3815 * at the next higher buddy size (i.e. current size * 2) and 3814 * at the next higher buddy size (i.e. current size * 2) and
3816 * the next maximum free (current free + 1). 3815 * the next maximum free (current free + 1).
3817 * This continues until the maximum possible buddy combination 3816 * This continues until the maximum possible buddy combination
3818 * yields maximum free. 3817 * yields maximum free.
3819 */ 3818 */
3820 for (l2free = dtp->budmin, bsize = 1; l2free < l2max; 3819 for (l2free = dtp->budmin, bsize = 1; l2free < l2max;
3821 l2free++, bsize = nextb) { 3820 l2free++, bsize = nextb) {
3822 /* get next buddy size == current buddy pair size */ 3821 /* get next buddy size == current buddy pair size */
3823 nextb = bsize << 1; 3822 nextb = bsize << 1;
3824 3823
3825 /* scan each adjacent buddy pair at current buddy size */ 3824 /* scan each adjacent buddy pair at current buddy size */
3826 for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx); 3825 for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx);
3827 i < le32_to_cpu(dtp->nleafs); 3826 i < le32_to_cpu(dtp->nleafs);
3828 i += nextb, cp += nextb) { 3827 i += nextb, cp += nextb) {
3829 /* coalesce if both adjacent buddies are max free */ 3828 /* coalesce if both adjacent buddies are max free */
3830 if (*cp == l2free && *(cp + bsize) == l2free) { 3829 if (*cp == l2free && *(cp + bsize) == l2free) {
3831 *cp = l2free + 1; /* left take right */ 3830 *cp = l2free + 1; /* left take right */
3832 *(cp + bsize) = -1; /* right give left */ 3831 *(cp + bsize) = -1; /* right give left */
3833 } 3832 }
3834 } 3833 }
3835 } 3834 }
3836 3835
3837 /* 3836 /*
3838 * bubble summary information of leaves up the tree. 3837 * bubble summary information of leaves up the tree.
3839 * 3838 *
3840 * Starting at the leaf node level, the four nodes described by 3839 * Starting at the leaf node level, the four nodes described by
3841 * the higher level parent node are compared for a maximum free and 3840 * the higher level parent node are compared for a maximum free and
3842 * this maximum becomes the value of the parent node. 3841 * this maximum becomes the value of the parent node.
3843 * when all lower level nodes are processed in this fashion then 3842 * when all lower level nodes are processed in this fashion then
3844 * move up to the next level (parent becomes a lower level node) and 3843 * move up to the next level (parent becomes a lower level node) and
3845 * continue the process for that level. 3844 * continue the process for that level.
3846 */ 3845 */
3847 for (child = le32_to_cpu(dtp->leafidx), 3846 for (child = le32_to_cpu(dtp->leafidx),
3848 nparent = le32_to_cpu(dtp->nleafs) >> 2; 3847 nparent = le32_to_cpu(dtp->nleafs) >> 2;
3849 nparent > 0; nparent >>= 2, child = parent) { 3848 nparent > 0; nparent >>= 2, child = parent) {
3850 /* get index of 1st node of parent level */ 3849 /* get index of 1st node of parent level */
3851 parent = (child - 1) >> 2; 3850 parent = (child - 1) >> 2;
3852 3851
3853 /* set the value of the parent node as the maximum 3852 /* set the value of the parent node as the maximum
3854 * of the four nodes of the current level. 3853 * of the four nodes of the current level.
3855 */ 3854 */
3856 for (i = 0, cp = tp + child, cp1 = tp + parent; 3855 for (i = 0, cp = tp + child, cp1 = tp + parent;
3857 i < nparent; i++, cp += 4, cp1++) 3856 i < nparent; i++, cp += 4, cp1++)
3858 *cp1 = TREEMAX(cp); 3857 *cp1 = TREEMAX(cp);
3859 } 3858 }
3860 3859
3861 return (*tp); 3860 return (*tp);
3862 } 3861 }
3863 3862
3864 3863
3865 /* 3864 /*
3866 * dbInitDmapCtl() 3865 * dbInitDmapCtl()
3867 * 3866 *
3868 * function: initialize dmapctl page 3867 * function: initialize dmapctl page
3869 */ 3868 */
3870 static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) 3869 static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i)
3871 { /* start leaf index not covered by range */ 3870 { /* start leaf index not covered by range */
3872 s8 *cp; 3871 s8 *cp;
3873 3872
3874 dcp->nleafs = cpu_to_le32(LPERCTL); 3873 dcp->nleafs = cpu_to_le32(LPERCTL);
3875 dcp->l2nleafs = cpu_to_le32(L2LPERCTL); 3874 dcp->l2nleafs = cpu_to_le32(L2LPERCTL);
3876 dcp->leafidx = cpu_to_le32(CTLLEAFIND); 3875 dcp->leafidx = cpu_to_le32(CTLLEAFIND);
3877 dcp->height = cpu_to_le32(5); 3876 dcp->height = cpu_to_le32(5);
3878 dcp->budmin = L2BPERDMAP + L2LPERCTL * level; 3877 dcp->budmin = L2BPERDMAP + L2LPERCTL * level;
3879 3878
3880 /* 3879 /*
3881 * initialize the leaves of current level that were not covered 3880 * initialize the leaves of current level that were not covered
3882 * by the specified input block range (i.e. the leaves have no 3881 * by the specified input block range (i.e. the leaves have no
3883 * low level dmapctl or dmap). 3882 * low level dmapctl or dmap).
3884 */ 3883 */
3885 cp = &dcp->stree[CTLLEAFIND + i]; 3884 cp = &dcp->stree[CTLLEAFIND + i];
3886 for (; i < LPERCTL; i++) 3885 for (; i < LPERCTL; i++)
3887 *cp++ = NOFREE; 3886 *cp++ = NOFREE;
3888 3887
3889 /* build the dmap's binary buddy summary tree */ 3888 /* build the dmap's binary buddy summary tree */
3890 return (dbInitTree((struct dmaptree *) dcp)); 3889 return (dbInitTree((struct dmaptree *) dcp));
3891 } 3890 }
3892 3891
3893 3892
3894 /* 3893 /*
3895 * NAME: dbGetL2AGSize()/ujfs_getagl2size() 3894 * NAME: dbGetL2AGSize()/ujfs_getagl2size()
3896 * 3895 *
3897 * FUNCTION: Determine log2(allocation group size) from aggregate size 3896 * FUNCTION: Determine log2(allocation group size) from aggregate size
3898 * 3897 *
3899 * PARAMETERS: 3898 * PARAMETERS:
3900 * nblocks - Number of blocks in aggregate 3899 * nblocks - Number of blocks in aggregate
3901 * 3900 *
3902 * RETURNS: log2(allocation group size) in aggregate blocks 3901 * RETURNS: log2(allocation group size) in aggregate blocks
3903 */ 3902 */
3904 static int dbGetL2AGSize(s64 nblocks) 3903 static int dbGetL2AGSize(s64 nblocks)
3905 { 3904 {
3906 s64 sz; 3905 s64 sz;
3907 s64 m; 3906 s64 m;
3908 int l2sz; 3907 int l2sz;
3909 3908
3910 if (nblocks < BPERDMAP * MAXAG) 3909 if (nblocks < BPERDMAP * MAXAG)
3911 return (L2BPERDMAP); 3910 return (L2BPERDMAP);
3912 3911
3913 /* round up aggregate size to power of 2 */ 3912 /* round up aggregate size to power of 2 */
3914 m = ((u64) 1 << (64 - 1)); 3913 m = ((u64) 1 << (64 - 1));
3915 for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) { 3914 for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) {
3916 if (m & nblocks) 3915 if (m & nblocks)
3917 break; 3916 break;
3918 } 3917 }
3919 3918
3920 sz = (s64) 1 << l2sz; 3919 sz = (s64) 1 << l2sz;
3921 if (sz < nblocks) 3920 if (sz < nblocks)
3922 l2sz += 1; 3921 l2sz += 1;
3923 3922
3924 /* agsize = roundupSize/max_number_of_ag */ 3923 /* agsize = roundupSize/max_number_of_ag */
3925 return (l2sz - L2MAXAG); 3924 return (l2sz - L2MAXAG);
3926 } 3925 }
3927 3926
3928 3927
3929 /* 3928 /*
3930 * NAME: dbMapFileSizeToMapSize() 3929 * NAME: dbMapFileSizeToMapSize()
3931 * 3930 *
3932 * FUNCTION: compute number of blocks the block allocation map file 3931 * FUNCTION: compute number of blocks the block allocation map file
3933 * can cover from the map file size; 3932 * can cover from the map file size;
3934 * 3933 *
3935 * RETURNS: Number of blocks which can be covered by this block map file; 3934 * RETURNS: Number of blocks which can be covered by this block map file;
3936 */ 3935 */
3937 3936
3938 /* 3937 /*
3939 * maximum number of map pages at each level including control pages 3938 * maximum number of map pages at each level including control pages
3940 */ 3939 */
3941 #define MAXL0PAGES (1 + LPERCTL) 3940 #define MAXL0PAGES (1 + LPERCTL)
3942 #define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES) 3941 #define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES)
3943 #define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES) 3942 #define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES)
3944 3943
3945 /* 3944 /*
3946 * convert number of map pages to the zero origin top dmapctl level 3945 * convert number of map pages to the zero origin top dmapctl level
3947 */ 3946 */
3948 #define BMAPPGTOLEV(npages) \ 3947 #define BMAPPGTOLEV(npages) \
3949 (((npages) <= 3 + MAXL0PAGES) ? 0 : \ 3948 (((npages) <= 3 + MAXL0PAGES) ? 0 : \
3950 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) 3949 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
3951 3950
3952 s64 dbMapFileSizeToMapSize(struct inode * ipbmap) 3951 s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
3953 { 3952 {
3954 struct super_block *sb = ipbmap->i_sb; 3953 struct super_block *sb = ipbmap->i_sb;
3955 s64 nblocks; 3954 s64 nblocks;
3956 s64 npages, ndmaps; 3955 s64 npages, ndmaps;
3957 int level, i; 3956 int level, i;
3958 int complete, factor; 3957 int complete, factor;
3959 3958
3960 nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize; 3959 nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize;
3961 npages = nblocks >> JFS_SBI(sb)->l2nbperpage; 3960 npages = nblocks >> JFS_SBI(sb)->l2nbperpage;
3962 level = BMAPPGTOLEV(npages); 3961 level = BMAPPGTOLEV(npages);
3963 3962
3964 /* At each level, accumulate the number of dmap pages covered by 3963 /* At each level, accumulate the number of dmap pages covered by
3965 * the number of full child levels below it; 3964 * the number of full child levels below it;
3966 * repeat for the last incomplete child level. 3965 * repeat for the last incomplete child level.
3967 */ 3966 */
3968 ndmaps = 0; 3967 ndmaps = 0;
3969 npages--; /* skip the first global control page */ 3968 npages--; /* skip the first global control page */
3970 /* skip higher level control pages above top level covered by map */ 3969 /* skip higher level control pages above top level covered by map */
3971 npages -= (2 - level); 3970 npages -= (2 - level);
3972 npages--; /* skip top level's control page */ 3971 npages--; /* skip top level's control page */
3973 for (i = level; i >= 0; i--) { 3972 for (i = level; i >= 0; i--) {
3974 factor = 3973 factor =
3975 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); 3974 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
3976 complete = (u32) npages / factor; 3975 complete = (u32) npages / factor;
3977 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : 3976 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
3978 ((i == 1) ? LPERCTL : 1)); 3977 ((i == 1) ? LPERCTL : 1));
3979 3978
3980 /* pages in last/incomplete child */ 3979 /* pages in last/incomplete child */
3981 npages = (u32) npages % factor; 3980 npages = (u32) npages % factor;
3982 /* skip incomplete child's level control page */ 3981 /* skip incomplete child's level control page */
3983 npages--; 3982 npages--;
3984 } 3983 }
3985 3984
3986 /* convert the number of dmaps into the number of blocks 3985 /* convert the number of dmaps into the number of blocks
3987 * which can be covered by the dmaps; 3986 * which can be covered by the dmaps;
3988 */ 3987 */
3989 nblocks = ndmaps << L2BPERDMAP; 3988 nblocks = ndmaps << L2BPERDMAP;
3990 3989
3991 return (nblocks); 3990 return (nblocks);
3992 } 3991 }
3993 3992
1 /* 1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2004 2 * Copyright (C) International Business Machines Corp., 2000-2004
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19 /* 19 /*
20 * jfs_imap.c: inode allocation map manager 20 * jfs_imap.c: inode allocation map manager
21 * 21 *
22 * Serialization: 22 * Serialization:
23 * Each AG has a simple lock which is used to control the serialization of 23 * Each AG has a simple lock which is used to control the serialization of
24 * the AG level lists. This lock should be taken first whenever an AG 24 * the AG level lists. This lock should be taken first whenever an AG
25 * level list will be modified or accessed. 25 * level list will be modified or accessed.
26 * 26 *
27 * Each IAG is locked by obtaining the buffer for the IAG page. 27 * Each IAG is locked by obtaining the buffer for the IAG page.
28 * 28 *
29 * There is also a inode lock for the inode map inode. A read lock needs to 29 * There is also a inode lock for the inode map inode. A read lock needs to
30 * be taken whenever an IAG is read from the map or the global level 30 * be taken whenever an IAG is read from the map or the global level
31 * information is read. A write lock needs to be taken whenever the global 31 * information is read. A write lock needs to be taken whenever the global
32 * level information is modified or an atomic operation needs to be used. 32 * level information is modified or an atomic operation needs to be used.
33 * 33 *
34 * If more than one IAG is read at one time, the read lock may not 34 * If more than one IAG is read at one time, the read lock may not
35 * be given up until all of the IAG's are read. Otherwise, a deadlock 35 * be given up until all of the IAG's are read. Otherwise, a deadlock
36 * may occur when trying to obtain the read lock while another thread 36 * may occur when trying to obtain the read lock while another thread
37 * holding the read lock is waiting on the IAG already being held. 37 * holding the read lock is waiting on the IAG already being held.
38 * 38 *
39 * The control page of the inode map is read into memory by diMount(). 39 * The control page of the inode map is read into memory by diMount().
40 * Thereafter it should only be modified in memory and then it will be 40 * Thereafter it should only be modified in memory and then it will be
41 * written out when the filesystem is unmounted by diUnmount(). 41 * written out when the filesystem is unmounted by diUnmount().
42 */ 42 */
43 43
44 #include <linux/fs.h> 44 #include <linux/fs.h>
45 #include <linux/buffer_head.h> 45 #include <linux/buffer_head.h>
46 #include <linux/pagemap.h> 46 #include <linux/pagemap.h>
47 #include <linux/quotaops.h> 47 #include <linux/quotaops.h>
48 48
49 #include "jfs_incore.h" 49 #include "jfs_incore.h"
50 #include "jfs_inode.h" 50 #include "jfs_inode.h"
51 #include "jfs_filsys.h" 51 #include "jfs_filsys.h"
52 #include "jfs_dinode.h" 52 #include "jfs_dinode.h"
53 #include "jfs_dmap.h" 53 #include "jfs_dmap.h"
54 #include "jfs_imap.h" 54 #include "jfs_imap.h"
55 #include "jfs_metapage.h" 55 #include "jfs_metapage.h"
56 #include "jfs_superblock.h" 56 #include "jfs_superblock.h"
57 #include "jfs_debug.h" 57 #include "jfs_debug.h"
58 58
59 /* 59 /*
60 * __mark_inode_dirty expects inodes to be hashed. Since we don't want 60 * __mark_inode_dirty expects inodes to be hashed. Since we don't want
61 * special inodes in the fileset inode space, we hash them to a dummy head 61 * special inodes in the fileset inode space, we hash them to a dummy head
62 */ 62 */
63 static HLIST_HEAD(aggregate_hash); 63 static HLIST_HEAD(aggregate_hash);
64 64
65 /* 65 /*
66 * imap locks 66 * imap locks
67 */ 67 */
68 /* iag free list lock */ 68 /* iag free list lock */
69 #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) 69 #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock)
70 #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) 70 #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock)
71 #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) 71 #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock)
72 72
73 /* per ag iag list locks */ 73 /* per ag iag list locks */
74 #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) 74 #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index]))
75 #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) 75 #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno])
76 #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) 76 #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno])
77 77
78 /* 78 /*
79 * forward references 79 * forward references
80 */ 80 */
81 static int diAllocAG(struct inomap *, int, bool, struct inode *); 81 static int diAllocAG(struct inomap *, int, bool, struct inode *);
82 static int diAllocAny(struct inomap *, int, bool, struct inode *); 82 static int diAllocAny(struct inomap *, int, bool, struct inode *);
83 static int diAllocBit(struct inomap *, struct iag *, int); 83 static int diAllocBit(struct inomap *, struct iag *, int);
84 static int diAllocExt(struct inomap *, int, struct inode *); 84 static int diAllocExt(struct inomap *, int, struct inode *);
85 static int diAllocIno(struct inomap *, int, struct inode *); 85 static int diAllocIno(struct inomap *, int, struct inode *);
86 static int diFindFree(u32, int); 86 static int diFindFree(u32, int);
87 static int diNewExt(struct inomap *, struct iag *, int); 87 static int diNewExt(struct inomap *, struct iag *, int);
88 static int diNewIAG(struct inomap *, int *, int, struct metapage **); 88 static int diNewIAG(struct inomap *, int *, int, struct metapage **);
89 static void duplicateIXtree(struct super_block *, s64, int, s64 *); 89 static void duplicateIXtree(struct super_block *, s64, int, s64 *);
90 90
91 static int diIAGRead(struct inomap * imap, int, struct metapage **); 91 static int diIAGRead(struct inomap * imap, int, struct metapage **);
92 static int copy_from_dinode(struct dinode *, struct inode *); 92 static int copy_from_dinode(struct dinode *, struct inode *);
93 static void copy_to_dinode(struct dinode *, struct inode *); 93 static void copy_to_dinode(struct dinode *, struct inode *);
94 94
95 /* 95 /*
96 * NAME: diMount() 96 * NAME: diMount()
97 * 97 *
98 * FUNCTION: initialize the incore inode map control structures for 98 * FUNCTION: initialize the incore inode map control structures for
99 * a fileset or aggregate init time. 99 * a fileset or aggregate init time.
100 * 100 *
101 * the inode map's control structure (dinomap) is 101 * the inode map's control structure (dinomap) is
102 * brought in from disk and placed in virtual memory. 102 * brought in from disk and placed in virtual memory.
103 * 103 *
104 * PARAMETERS: 104 * PARAMETERS:
105 * ipimap - pointer to inode map inode for the aggregate or fileset. 105 * ipimap - pointer to inode map inode for the aggregate or fileset.
106 * 106 *
107 * RETURN VALUES: 107 * RETURN VALUES:
108 * 0 - success 108 * 0 - success
109 * -ENOMEM - insufficient free virtual memory. 109 * -ENOMEM - insufficient free virtual memory.
110 * -EIO - i/o error. 110 * -EIO - i/o error.
111 */ 111 */
112 int diMount(struct inode *ipimap) 112 int diMount(struct inode *ipimap)
113 { 113 {
114 struct inomap *imap; 114 struct inomap *imap;
115 struct metapage *mp; 115 struct metapage *mp;
116 int index; 116 int index;
117 struct dinomap_disk *dinom_le; 117 struct dinomap_disk *dinom_le;
118 118
119 /* 119 /*
120 * allocate/initialize the in-memory inode map control structure 120 * allocate/initialize the in-memory inode map control structure
121 */ 121 */
122 /* allocate the in-memory inode map control structure. */ 122 /* allocate the in-memory inode map control structure. */
123 imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); 123 imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
124 if (imap == NULL) { 124 if (imap == NULL) {
125 jfs_err("diMount: kmalloc returned NULL!"); 125 jfs_err("diMount: kmalloc returned NULL!");
126 return -ENOMEM; 126 return -ENOMEM;
127 } 127 }
128 128
129 /* read the on-disk inode map control structure. */ 129 /* read the on-disk inode map control structure. */
130 130
131 mp = read_metapage(ipimap, 131 mp = read_metapage(ipimap,
132 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 132 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
133 PSIZE, 0); 133 PSIZE, 0);
134 if (mp == NULL) { 134 if (mp == NULL) {
135 kfree(imap); 135 kfree(imap);
136 return -EIO; 136 return -EIO;
137 } 137 }
138 138
139 /* copy the on-disk version to the in-memory version. */ 139 /* copy the on-disk version to the in-memory version. */
140 dinom_le = (struct dinomap_disk *) mp->data; 140 dinom_le = (struct dinomap_disk *) mp->data;
141 imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); 141 imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
142 imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); 142 imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
143 atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); 143 atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
144 atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); 144 atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
145 imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); 145 imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
146 imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); 146 imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
147 for (index = 0; index < MAXAG; index++) { 147 for (index = 0; index < MAXAG; index++) {
148 imap->im_agctl[index].inofree = 148 imap->im_agctl[index].inofree =
149 le32_to_cpu(dinom_le->in_agctl[index].inofree); 149 le32_to_cpu(dinom_le->in_agctl[index].inofree);
150 imap->im_agctl[index].extfree = 150 imap->im_agctl[index].extfree =
151 le32_to_cpu(dinom_le->in_agctl[index].extfree); 151 le32_to_cpu(dinom_le->in_agctl[index].extfree);
152 imap->im_agctl[index].numinos = 152 imap->im_agctl[index].numinos =
153 le32_to_cpu(dinom_le->in_agctl[index].numinos); 153 le32_to_cpu(dinom_le->in_agctl[index].numinos);
154 imap->im_agctl[index].numfree = 154 imap->im_agctl[index].numfree =
155 le32_to_cpu(dinom_le->in_agctl[index].numfree); 155 le32_to_cpu(dinom_le->in_agctl[index].numfree);
156 } 156 }
157 157
158 /* release the buffer. */ 158 /* release the buffer. */
159 release_metapage(mp); 159 release_metapage(mp);
160 160
161 /* 161 /*
162 * allocate/initialize inode allocation map locks 162 * allocate/initialize inode allocation map locks
163 */ 163 */
164 /* allocate and init iag free list lock */ 164 /* allocate and init iag free list lock */
165 IAGFREE_LOCK_INIT(imap); 165 IAGFREE_LOCK_INIT(imap);
166 166
167 /* allocate and init ag list locks */ 167 /* allocate and init ag list locks */
168 for (index = 0; index < MAXAG; index++) { 168 for (index = 0; index < MAXAG; index++) {
169 AG_LOCK_INIT(imap, index); 169 AG_LOCK_INIT(imap, index);
170 } 170 }
171 171
172 /* bind the inode map inode and inode map control structure 172 /* bind the inode map inode and inode map control structure
173 * to each other. 173 * to each other.
174 */ 174 */
175 imap->im_ipimap = ipimap; 175 imap->im_ipimap = ipimap;
176 JFS_IP(ipimap)->i_imap = imap; 176 JFS_IP(ipimap)->i_imap = imap;
177 177
178 return (0); 178 return (0);
179 } 179 }
180 180
181 181
182 /* 182 /*
183 * NAME: diUnmount() 183 * NAME: diUnmount()
184 * 184 *
185 * FUNCTION: write to disk the incore inode map control structures for 185 * FUNCTION: write to disk the incore inode map control structures for
186 * a fileset or aggregate at unmount time. 186 * a fileset or aggregate at unmount time.
187 * 187 *
188 * PARAMETERS: 188 * PARAMETERS:
189 * ipimap - pointer to inode map inode for the aggregate or fileset. 189 * ipimap - pointer to inode map inode for the aggregate or fileset.
190 * 190 *
191 * RETURN VALUES: 191 * RETURN VALUES:
192 * 0 - success 192 * 0 - success
193 * -ENOMEM - insufficient free virtual memory. 193 * -ENOMEM - insufficient free virtual memory.
194 * -EIO - i/o error. 194 * -EIO - i/o error.
195 */ 195 */
196 int diUnmount(struct inode *ipimap, int mounterror) 196 int diUnmount(struct inode *ipimap, int mounterror)
197 { 197 {
198 struct inomap *imap = JFS_IP(ipimap)->i_imap; 198 struct inomap *imap = JFS_IP(ipimap)->i_imap;
199 199
200 /* 200 /*
201 * update the on-disk inode map control structure 201 * update the on-disk inode map control structure
202 */ 202 */
203 203
204 if (!(mounterror || isReadOnly(ipimap))) 204 if (!(mounterror || isReadOnly(ipimap)))
205 diSync(ipimap); 205 diSync(ipimap);
206 206
207 /* 207 /*
208 * Invalidate the page cache buffers 208 * Invalidate the page cache buffers
209 */ 209 */
210 truncate_inode_pages(ipimap->i_mapping, 0); 210 truncate_inode_pages(ipimap->i_mapping, 0);
211 211
212 /* 212 /*
213 * free in-memory control structure 213 * free in-memory control structure
214 */ 214 */
215 kfree(imap); 215 kfree(imap);
216 216
217 return (0); 217 return (0);
218 } 218 }
219 219
220 220
221 /* 221 /*
222 * diSync() 222 * diSync()
223 */ 223 */
224 int diSync(struct inode *ipimap) 224 int diSync(struct inode *ipimap)
225 { 225 {
226 struct dinomap_disk *dinom_le; 226 struct dinomap_disk *dinom_le;
227 struct inomap *imp = JFS_IP(ipimap)->i_imap; 227 struct inomap *imp = JFS_IP(ipimap)->i_imap;
228 struct metapage *mp; 228 struct metapage *mp;
229 int index; 229 int index;
230 230
231 /* 231 /*
232 * write imap global conrol page 232 * write imap global conrol page
233 */ 233 */
234 /* read the on-disk inode map control structure */ 234 /* read the on-disk inode map control structure */
235 mp = get_metapage(ipimap, 235 mp = get_metapage(ipimap,
236 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 236 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
237 PSIZE, 0); 237 PSIZE, 0);
238 if (mp == NULL) { 238 if (mp == NULL) {
239 jfs_err("diSync: get_metapage failed!"); 239 jfs_err("diSync: get_metapage failed!");
240 return -EIO; 240 return -EIO;
241 } 241 }
242 242
243 /* copy the in-memory version to the on-disk version */ 243 /* copy the in-memory version to the on-disk version */
244 dinom_le = (struct dinomap_disk *) mp->data; 244 dinom_le = (struct dinomap_disk *) mp->data;
245 dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); 245 dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
246 dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); 246 dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
247 dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); 247 dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
248 dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); 248 dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
249 dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); 249 dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
250 dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); 250 dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
251 for (index = 0; index < MAXAG; index++) { 251 for (index = 0; index < MAXAG; index++) {
252 dinom_le->in_agctl[index].inofree = 252 dinom_le->in_agctl[index].inofree =
253 cpu_to_le32(imp->im_agctl[index].inofree); 253 cpu_to_le32(imp->im_agctl[index].inofree);
254 dinom_le->in_agctl[index].extfree = 254 dinom_le->in_agctl[index].extfree =
255 cpu_to_le32(imp->im_agctl[index].extfree); 255 cpu_to_le32(imp->im_agctl[index].extfree);
256 dinom_le->in_agctl[index].numinos = 256 dinom_le->in_agctl[index].numinos =
257 cpu_to_le32(imp->im_agctl[index].numinos); 257 cpu_to_le32(imp->im_agctl[index].numinos);
258 dinom_le->in_agctl[index].numfree = 258 dinom_le->in_agctl[index].numfree =
259 cpu_to_le32(imp->im_agctl[index].numfree); 259 cpu_to_le32(imp->im_agctl[index].numfree);
260 } 260 }
261 261
262 /* write out the control structure */ 262 /* write out the control structure */
263 write_metapage(mp); 263 write_metapage(mp);
264 264
265 /* 265 /*
266 * write out dirty pages of imap 266 * write out dirty pages of imap
267 */ 267 */
268 filemap_write_and_wait(ipimap->i_mapping); 268 filemap_write_and_wait(ipimap->i_mapping);
269 269
270 diWriteSpecial(ipimap, 0); 270 diWriteSpecial(ipimap, 0);
271 271
272 return (0); 272 return (0);
273 } 273 }
274 274
275 275
276 /* 276 /*
277 * NAME: diRead() 277 * NAME: diRead()
278 * 278 *
279 * FUNCTION: initialize an incore inode from disk. 279 * FUNCTION: initialize an incore inode from disk.
280 * 280 *
281 * on entry, the specifed incore inode should itself 281 * on entry, the specifed incore inode should itself
282 * specify the disk inode number corresponding to the 282 * specify the disk inode number corresponding to the
283 * incore inode (i.e. i_number should be initialized). 283 * incore inode (i.e. i_number should be initialized).
284 * 284 *
285 * this routine handles incore inode initialization for 285 * this routine handles incore inode initialization for
286 * both "special" and "regular" inodes. special inodes 286 * both "special" and "regular" inodes. special inodes
287 * are those required early in the mount process and 287 * are those required early in the mount process and
288 * require special handling since much of the file system 288 * require special handling since much of the file system
289 * is not yet initialized. these "special" inodes are 289 * is not yet initialized. these "special" inodes are
290 * identified by a NULL inode map inode pointer and are 290 * identified by a NULL inode map inode pointer and are
291 * actually initialized by a call to diReadSpecial(). 291 * actually initialized by a call to diReadSpecial().
292 * 292 *
293 * for regular inodes, the iag describing the disk inode 293 * for regular inodes, the iag describing the disk inode
294 * is read from disk to determine the inode extent address 294 * is read from disk to determine the inode extent address
295 * for the disk inode. with the inode extent address in 295 * for the disk inode. with the inode extent address in
296 * hand, the page of the extent that contains the disk 296 * hand, the page of the extent that contains the disk
297 * inode is read and the disk inode is copied to the 297 * inode is read and the disk inode is copied to the
298 * incore inode. 298 * incore inode.
299 * 299 *
300 * PARAMETERS: 300 * PARAMETERS:
301 * ip - pointer to incore inode to be initialized from disk. 301 * ip - pointer to incore inode to be initialized from disk.
302 * 302 *
303 * RETURN VALUES: 303 * RETURN VALUES:
304 * 0 - success 304 * 0 - success
305 * -EIO - i/o error. 305 * -EIO - i/o error.
306 * -ENOMEM - insufficient memory 306 * -ENOMEM - insufficient memory
307 * 307 *
308 */ 308 */
309 int diRead(struct inode *ip) 309 int diRead(struct inode *ip)
310 { 310 {
311 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 311 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
312 int iagno, ino, extno, rc; 312 int iagno, ino, extno, rc;
313 struct inode *ipimap; 313 struct inode *ipimap;
314 struct dinode *dp; 314 struct dinode *dp;
315 struct iag *iagp; 315 struct iag *iagp;
316 struct metapage *mp; 316 struct metapage *mp;
317 s64 blkno, agstart; 317 s64 blkno, agstart;
318 struct inomap *imap; 318 struct inomap *imap;
319 int block_offset; 319 int block_offset;
320 int inodes_left; 320 int inodes_left;
321 unsigned long pageno; 321 unsigned long pageno;
322 int rel_inode; 322 int rel_inode;
323 323
324 jfs_info("diRead: ino = %ld", ip->i_ino); 324 jfs_info("diRead: ino = %ld", ip->i_ino);
325 325
326 ipimap = sbi->ipimap; 326 ipimap = sbi->ipimap;
327 JFS_IP(ip)->ipimap = ipimap; 327 JFS_IP(ip)->ipimap = ipimap;
328 328
329 /* determine the iag number for this inode (number) */ 329 /* determine the iag number for this inode (number) */
330 iagno = INOTOIAG(ip->i_ino); 330 iagno = INOTOIAG(ip->i_ino);
331 331
332 /* read the iag */ 332 /* read the iag */
333 imap = JFS_IP(ipimap)->i_imap; 333 imap = JFS_IP(ipimap)->i_imap;
334 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 334 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
335 rc = diIAGRead(imap, iagno, &mp); 335 rc = diIAGRead(imap, iagno, &mp);
336 IREAD_UNLOCK(ipimap); 336 IREAD_UNLOCK(ipimap);
337 if (rc) { 337 if (rc) {
338 jfs_err("diRead: diIAGRead returned %d", rc); 338 jfs_err("diRead: diIAGRead returned %d", rc);
339 return (rc); 339 return (rc);
340 } 340 }
341 341
342 iagp = (struct iag *) mp->data; 342 iagp = (struct iag *) mp->data;
343 343
344 /* determine inode extent that holds the disk inode */ 344 /* determine inode extent that holds the disk inode */
345 ino = ip->i_ino & (INOSPERIAG - 1); 345 ino = ip->i_ino & (INOSPERIAG - 1);
346 extno = ino >> L2INOSPEREXT; 346 extno = ino >> L2INOSPEREXT;
347 347
348 if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || 348 if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
349 (addressPXD(&iagp->inoext[extno]) == 0)) { 349 (addressPXD(&iagp->inoext[extno]) == 0)) {
350 release_metapage(mp); 350 release_metapage(mp);
351 return -ESTALE; 351 return -ESTALE;
352 } 352 }
353 353
354 /* get disk block number of the page within the inode extent 354 /* get disk block number of the page within the inode extent
355 * that holds the disk inode. 355 * that holds the disk inode.
356 */ 356 */
357 blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); 357 blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
358 358
359 /* get the ag for the iag */ 359 /* get the ag for the iag */
360 agstart = le64_to_cpu(iagp->agstart); 360 agstart = le64_to_cpu(iagp->agstart);
361 361
362 release_metapage(mp); 362 release_metapage(mp);
363 363
364 rel_inode = (ino & (INOSPERPAGE - 1)); 364 rel_inode = (ino & (INOSPERPAGE - 1));
365 pageno = blkno >> sbi->l2nbperpage; 365 pageno = blkno >> sbi->l2nbperpage;
366 366
367 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 367 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
368 /* 368 /*
369 * OS/2 didn't always align inode extents on page boundaries 369 * OS/2 didn't always align inode extents on page boundaries
370 */ 370 */
371 inodes_left = 371 inodes_left =
372 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 372 (sbi->nbperpage - block_offset) << sbi->l2niperblk;
373 373
374 if (rel_inode < inodes_left) 374 if (rel_inode < inodes_left)
375 rel_inode += block_offset << sbi->l2niperblk; 375 rel_inode += block_offset << sbi->l2niperblk;
376 else { 376 else {
377 pageno += 1; 377 pageno += 1;
378 rel_inode -= inodes_left; 378 rel_inode -= inodes_left;
379 } 379 }
380 } 380 }
381 381
382 /* read the page of disk inode */ 382 /* read the page of disk inode */
383 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 383 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
384 if (!mp) { 384 if (!mp) {
385 jfs_err("diRead: read_metapage failed"); 385 jfs_err("diRead: read_metapage failed");
386 return -EIO; 386 return -EIO;
387 } 387 }
388 388
389 /* locate the disk inode requested */ 389 /* locate the disk inode requested */
390 dp = (struct dinode *) mp->data; 390 dp = (struct dinode *) mp->data;
391 dp += rel_inode; 391 dp += rel_inode;
392 392
393 if (ip->i_ino != le32_to_cpu(dp->di_number)) { 393 if (ip->i_ino != le32_to_cpu(dp->di_number)) {
394 jfs_error(ip->i_sb, "diRead: i_ino != di_number"); 394 jfs_error(ip->i_sb, "diRead: i_ino != di_number");
395 rc = -EIO; 395 rc = -EIO;
396 } else if (le32_to_cpu(dp->di_nlink) == 0) 396 } else if (le32_to_cpu(dp->di_nlink) == 0)
397 rc = -ESTALE; 397 rc = -ESTALE;
398 else 398 else
399 /* copy the disk inode to the in-memory inode */ 399 /* copy the disk inode to the in-memory inode */
400 rc = copy_from_dinode(dp, ip); 400 rc = copy_from_dinode(dp, ip);
401 401
402 release_metapage(mp); 402 release_metapage(mp);
403 403
404 /* set the ag for the inode */ 404 /* set the ag for the inode */
405 JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); 405 JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
406 JFS_IP(ip)->active_ag = -1; 406 JFS_IP(ip)->active_ag = -1;
407 407
408 return (rc); 408 return (rc);
409 } 409 }
410 410
411 411
412 /* 412 /*
413 * NAME: diReadSpecial() 413 * NAME: diReadSpecial()
414 * 414 *
415 * FUNCTION: initialize a 'special' inode from disk. 415 * FUNCTION: initialize a 'special' inode from disk.
416 * 416 *
417 * this routines handles aggregate level inodes. The 417 * this routines handles aggregate level inodes. The
418 * inode cache cannot differentiate between the 418 * inode cache cannot differentiate between the
419 * aggregate inodes and the filesystem inodes, so we 419 * aggregate inodes and the filesystem inodes, so we
420 * handle these here. We don't actually use the aggregate 420 * handle these here. We don't actually use the aggregate
421 * inode map, since these inodes are at a fixed location 421 * inode map, since these inodes are at a fixed location
422 * and in some cases the aggregate inode map isn't initialized 422 * and in some cases the aggregate inode map isn't initialized
423 * yet. 423 * yet.
424 * 424 *
425 * PARAMETERS: 425 * PARAMETERS:
426 * sb - filesystem superblock 426 * sb - filesystem superblock
427 * inum - aggregate inode number 427 * inum - aggregate inode number
428 * secondary - 1 if secondary aggregate inode table 428 * secondary - 1 if secondary aggregate inode table
429 * 429 *
430 * RETURN VALUES: 430 * RETURN VALUES:
431 * new inode - success 431 * new inode - success
432 * NULL - i/o error. 432 * NULL - i/o error.
433 */ 433 */
434 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) 434 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
435 { 435 {
436 struct jfs_sb_info *sbi = JFS_SBI(sb); 436 struct jfs_sb_info *sbi = JFS_SBI(sb);
437 uint address; 437 uint address;
438 struct dinode *dp; 438 struct dinode *dp;
439 struct inode *ip; 439 struct inode *ip;
440 struct metapage *mp; 440 struct metapage *mp;
441 441
442 ip = new_inode(sb); 442 ip = new_inode(sb);
443 if (ip == NULL) { 443 if (ip == NULL) {
444 jfs_err("diReadSpecial: new_inode returned NULL!"); 444 jfs_err("diReadSpecial: new_inode returned NULL!");
445 return ip; 445 return ip;
446 } 446 }
447 447
448 if (secondary) { 448 if (secondary) {
449 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 449 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
450 JFS_IP(ip)->ipimap = sbi->ipaimap2; 450 JFS_IP(ip)->ipimap = sbi->ipaimap2;
451 } else { 451 } else {
452 address = AITBL_OFF >> L2PSIZE; 452 address = AITBL_OFF >> L2PSIZE;
453 JFS_IP(ip)->ipimap = sbi->ipaimap; 453 JFS_IP(ip)->ipimap = sbi->ipaimap;
454 } 454 }
455 455
456 ASSERT(inum < INOSPEREXT); 456 ASSERT(inum < INOSPEREXT);
457 457
458 ip->i_ino = inum; 458 ip->i_ino = inum;
459 459
460 address += inum >> 3; /* 8 inodes per 4K page */ 460 address += inum >> 3; /* 8 inodes per 4K page */
461 461
462 /* read the page of fixed disk inode (AIT) in raw mode */ 462 /* read the page of fixed disk inode (AIT) in raw mode */
463 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 463 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
464 if (mp == NULL) { 464 if (mp == NULL) {
465 ip->i_nlink = 1; /* Don't want iput() deleting it */ 465 ip->i_nlink = 1; /* Don't want iput() deleting it */
466 iput(ip); 466 iput(ip);
467 return (NULL); 467 return (NULL);
468 } 468 }
469 469
470 /* get the pointer to the disk inode of interest */ 470 /* get the pointer to the disk inode of interest */
471 dp = (struct dinode *) (mp->data); 471 dp = (struct dinode *) (mp->data);
472 dp += inum % 8; /* 8 inodes per 4K page */ 472 dp += inum % 8; /* 8 inodes per 4K page */
473 473
474 /* copy on-disk inode to in-memory inode */ 474 /* copy on-disk inode to in-memory inode */
475 if ((copy_from_dinode(dp, ip)) != 0) { 475 if ((copy_from_dinode(dp, ip)) != 0) {
476 /* handle bad return by returning NULL for ip */ 476 /* handle bad return by returning NULL for ip */
477 ip->i_nlink = 1; /* Don't want iput() deleting it */ 477 ip->i_nlink = 1; /* Don't want iput() deleting it */
478 iput(ip); 478 iput(ip);
479 /* release the page */ 479 /* release the page */
480 release_metapage(mp); 480 release_metapage(mp);
481 return (NULL); 481 return (NULL);
482 482
483 } 483 }
484 484
485 ip->i_mapping->a_ops = &jfs_metapage_aops; 485 ip->i_mapping->a_ops = &jfs_metapage_aops;
486 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); 486 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
487 487
488 /* Allocations to metadata inodes should not affect quotas */ 488 /* Allocations to metadata inodes should not affect quotas */
489 ip->i_flags |= S_NOQUOTA; 489 ip->i_flags |= S_NOQUOTA;
490 490
491 if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { 491 if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
492 sbi->gengen = le32_to_cpu(dp->di_gengen); 492 sbi->gengen = le32_to_cpu(dp->di_gengen);
493 sbi->inostamp = le32_to_cpu(dp->di_inostamp); 493 sbi->inostamp = le32_to_cpu(dp->di_inostamp);
494 } 494 }
495 495
496 /* release the page */ 496 /* release the page */
497 release_metapage(mp); 497 release_metapage(mp);
498 498
499 hlist_add_head(&ip->i_hash, &aggregate_hash); 499 hlist_add_head(&ip->i_hash, &aggregate_hash);
500 500
501 return (ip); 501 return (ip);
502 } 502 }
503 503
504 /* 504 /*
505 * NAME: diWriteSpecial() 505 * NAME: diWriteSpecial()
506 * 506 *
507 * FUNCTION: Write the special inode to disk 507 * FUNCTION: Write the special inode to disk
508 * 508 *
509 * PARAMETERS: 509 * PARAMETERS:
510 * ip - special inode 510 * ip - special inode
511 * secondary - 1 if secondary aggregate inode table 511 * secondary - 1 if secondary aggregate inode table
512 * 512 *
513 * RETURN VALUES: none 513 * RETURN VALUES: none
514 */ 514 */
515 515
516 void diWriteSpecial(struct inode *ip, int secondary) 516 void diWriteSpecial(struct inode *ip, int secondary)
517 { 517 {
518 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 518 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
519 uint address; 519 uint address;
520 struct dinode *dp; 520 struct dinode *dp;
521 ino_t inum = ip->i_ino; 521 ino_t inum = ip->i_ino;
522 struct metapage *mp; 522 struct metapage *mp;
523 523
524 if (secondary) 524 if (secondary)
525 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 525 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
526 else 526 else
527 address = AITBL_OFF >> L2PSIZE; 527 address = AITBL_OFF >> L2PSIZE;
528 528
529 ASSERT(inum < INOSPEREXT); 529 ASSERT(inum < INOSPEREXT);
530 530
531 address += inum >> 3; /* 8 inodes per 4K page */ 531 address += inum >> 3; /* 8 inodes per 4K page */
532 532
533 /* read the page of fixed disk inode (AIT) in raw mode */ 533 /* read the page of fixed disk inode (AIT) in raw mode */
534 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 534 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
535 if (mp == NULL) { 535 if (mp == NULL) {
536 jfs_err("diWriteSpecial: failed to read aggregate inode " 536 jfs_err("diWriteSpecial: failed to read aggregate inode "
537 "extent!"); 537 "extent!");
538 return; 538 return;
539 } 539 }
540 540
541 /* get the pointer to the disk inode of interest */ 541 /* get the pointer to the disk inode of interest */
542 dp = (struct dinode *) (mp->data); 542 dp = (struct dinode *) (mp->data);
543 dp += inum % 8; /* 8 inodes per 4K page */ 543 dp += inum % 8; /* 8 inodes per 4K page */
544 544
545 /* copy on-disk inode to in-memory inode */ 545 /* copy on-disk inode to in-memory inode */
546 copy_to_dinode(dp, ip); 546 copy_to_dinode(dp, ip);
547 memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); 547 memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
548 548
549 if (inum == FILESYSTEM_I) 549 if (inum == FILESYSTEM_I)
550 dp->di_gengen = cpu_to_le32(sbi->gengen); 550 dp->di_gengen = cpu_to_le32(sbi->gengen);
551 551
552 /* write the page */ 552 /* write the page */
553 write_metapage(mp); 553 write_metapage(mp);
554 } 554 }
555 555
556 /* 556 /*
557 * NAME: diFreeSpecial() 557 * NAME: diFreeSpecial()
558 * 558 *
559 * FUNCTION: Free allocated space for special inode 559 * FUNCTION: Free allocated space for special inode
560 */ 560 */
561 void diFreeSpecial(struct inode *ip) 561 void diFreeSpecial(struct inode *ip)
562 { 562 {
563 if (ip == NULL) { 563 if (ip == NULL) {
564 jfs_err("diFreeSpecial called with NULL ip!"); 564 jfs_err("diFreeSpecial called with NULL ip!");
565 return; 565 return;
566 } 566 }
567 filemap_write_and_wait(ip->i_mapping); 567 filemap_write_and_wait(ip->i_mapping);
568 truncate_inode_pages(ip->i_mapping, 0); 568 truncate_inode_pages(ip->i_mapping, 0);
569 iput(ip); 569 iput(ip);
570 } 570 }
571 571
572 572
573 573
574 /* 574 /*
575 * NAME: diWrite() 575 * NAME: diWrite()
576 * 576 *
577 * FUNCTION: write the on-disk inode portion of the in-memory inode 577 * FUNCTION: write the on-disk inode portion of the in-memory inode
578 * to its corresponding on-disk inode. 578 * to its corresponding on-disk inode.
579 * 579 *
580 * on entry, the specifed incore inode should itself 580 * on entry, the specifed incore inode should itself
581 * specify the disk inode number corresponding to the 581 * specify the disk inode number corresponding to the
582 * incore inode (i.e. i_number should be initialized). 582 * incore inode (i.e. i_number should be initialized).
583 * 583 *
584 * the inode contains the inode extent address for the disk 584 * the inode contains the inode extent address for the disk
585 * inode. with the inode extent address in hand, the 585 * inode. with the inode extent address in hand, the
586 * page of the extent that contains the disk inode is 586 * page of the extent that contains the disk inode is
587 * read and the disk inode portion of the incore inode 587 * read and the disk inode portion of the incore inode
588 * is copied to the disk inode. 588 * is copied to the disk inode.
589 * 589 *
590 * PARAMETERS: 590 * PARAMETERS:
591 * tid - transacation id 591 * tid - transacation id
592 * ip - pointer to incore inode to be written to the inode extent. 592 * ip - pointer to incore inode to be written to the inode extent.
593 * 593 *
594 * RETURN VALUES: 594 * RETURN VALUES:
595 * 0 - success 595 * 0 - success
596 * -EIO - i/o error. 596 * -EIO - i/o error.
597 */ 597 */
598 int diWrite(tid_t tid, struct inode *ip) 598 int diWrite(tid_t tid, struct inode *ip)
599 { 599 {
600 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 600 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
601 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 601 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
602 int rc = 0; 602 int rc = 0;
603 s32 ino; 603 s32 ino;
604 struct dinode *dp; 604 struct dinode *dp;
605 s64 blkno; 605 s64 blkno;
606 int block_offset; 606 int block_offset;
607 int inodes_left; 607 int inodes_left;
608 struct metapage *mp; 608 struct metapage *mp;
609 unsigned long pageno; 609 unsigned long pageno;
610 int rel_inode; 610 int rel_inode;
611 int dioffset; 611 int dioffset;
612 struct inode *ipimap; 612 struct inode *ipimap;
613 uint type; 613 uint type;
614 lid_t lid; 614 lid_t lid;
615 struct tlock *ditlck, *tlck; 615 struct tlock *ditlck, *tlck;
616 struct linelock *dilinelock, *ilinelock; 616 struct linelock *dilinelock, *ilinelock;
617 struct lv *lv; 617 struct lv *lv;
618 int n; 618 int n;
619 619
620 ipimap = jfs_ip->ipimap; 620 ipimap = jfs_ip->ipimap;
621 621
622 ino = ip->i_ino & (INOSPERIAG - 1); 622 ino = ip->i_ino & (INOSPERIAG - 1);
623 623
624 if (!addressPXD(&(jfs_ip->ixpxd)) || 624 if (!addressPXD(&(jfs_ip->ixpxd)) ||
625 (lengthPXD(&(jfs_ip->ixpxd)) != 625 (lengthPXD(&(jfs_ip->ixpxd)) !=
626 JFS_IP(ipimap)->i_imap->im_nbperiext)) { 626 JFS_IP(ipimap)->i_imap->im_nbperiext)) {
627 jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); 627 jfs_error(ip->i_sb, "diWrite: ixpxd invalid");
628 return -EIO; 628 return -EIO;
629 } 629 }
630 630
631 /* 631 /*
632 * read the page of disk inode containing the specified inode: 632 * read the page of disk inode containing the specified inode:
633 */ 633 */
634 /* compute the block address of the page */ 634 /* compute the block address of the page */
635 blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); 635 blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
636 636
637 rel_inode = (ino & (INOSPERPAGE - 1)); 637 rel_inode = (ino & (INOSPERPAGE - 1));
638 pageno = blkno >> sbi->l2nbperpage; 638 pageno = blkno >> sbi->l2nbperpage;
639 639
640 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 640 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
641 /* 641 /*
642 * OS/2 didn't always align inode extents on page boundaries 642 * OS/2 didn't always align inode extents on page boundaries
643 */ 643 */
644 inodes_left = 644 inodes_left =
645 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 645 (sbi->nbperpage - block_offset) << sbi->l2niperblk;
646 646
647 if (rel_inode < inodes_left) 647 if (rel_inode < inodes_left)
648 rel_inode += block_offset << sbi->l2niperblk; 648 rel_inode += block_offset << sbi->l2niperblk;
649 else { 649 else {
650 pageno += 1; 650 pageno += 1;
651 rel_inode -= inodes_left; 651 rel_inode -= inodes_left;
652 } 652 }
653 } 653 }
654 /* read the page of disk inode */ 654 /* read the page of disk inode */
655 retry: 655 retry:
656 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 656 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
657 if (!mp) 657 if (!mp)
658 return -EIO; 658 return -EIO;
659 659
660 /* get the pointer to the disk inode */ 660 /* get the pointer to the disk inode */
661 dp = (struct dinode *) mp->data; 661 dp = (struct dinode *) mp->data;
662 dp += rel_inode; 662 dp += rel_inode;
663 663
664 dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; 664 dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
665 665
666 /* 666 /*
667 * acquire transaction lock on the on-disk inode; 667 * acquire transaction lock on the on-disk inode;
668 * N.B. tlock is acquired on ipimap not ip; 668 * N.B. tlock is acquired on ipimap not ip;
669 */ 669 */
670 if ((ditlck = 670 if ((ditlck =
671 txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) 671 txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
672 goto retry; 672 goto retry;
673 dilinelock = (struct linelock *) & ditlck->lock; 673 dilinelock = (struct linelock *) & ditlck->lock;
674 674
675 /* 675 /*
676 * copy btree root from in-memory inode to on-disk inode 676 * copy btree root from in-memory inode to on-disk inode
677 * 677 *
678 * (tlock is taken from inline B+-tree root in in-memory 678 * (tlock is taken from inline B+-tree root in in-memory
679 * inode when the B+-tree root is updated, which is pointed 679 * inode when the B+-tree root is updated, which is pointed
680 * by jfs_ip->blid as well as being on tx tlock list) 680 * by jfs_ip->blid as well as being on tx tlock list)
681 * 681 *
682 * further processing of btree root is based on the copy 682 * further processing of btree root is based on the copy
683 * in in-memory inode, where txLog() will log from, and, 683 * in in-memory inode, where txLog() will log from, and,
684 * for xtree root, txUpdateMap() will update map and reset 684 * for xtree root, txUpdateMap() will update map and reset
685 * XAD_NEW bit; 685 * XAD_NEW bit;
686 */ 686 */
687 687
688 if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { 688 if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
689 /* 689 /*
690 * This is the special xtree inside the directory for storing 690 * This is the special xtree inside the directory for storing
691 * the directory table 691 * the directory table
692 */ 692 */
693 xtpage_t *p, *xp; 693 xtpage_t *p, *xp;
694 xad_t *xad; 694 xad_t *xad;
695 695
696 jfs_ip->xtlid = 0; 696 jfs_ip->xtlid = 0;
697 tlck = lid_to_tlock(lid); 697 tlck = lid_to_tlock(lid);
698 assert(tlck->type & tlckXTREE); 698 assert(tlck->type & tlckXTREE);
699 tlck->type |= tlckBTROOT; 699 tlck->type |= tlckBTROOT;
700 tlck->mp = mp; 700 tlck->mp = mp;
701 ilinelock = (struct linelock *) & tlck->lock; 701 ilinelock = (struct linelock *) & tlck->lock;
702 702
703 /* 703 /*
704 * copy xtree root from inode to dinode: 704 * copy xtree root from inode to dinode:
705 */ 705 */
706 p = &jfs_ip->i_xtroot; 706 p = &jfs_ip->i_xtroot;
707 xp = (xtpage_t *) &dp->di_dirtable; 707 xp = (xtpage_t *) &dp->di_dirtable;
708 lv = ilinelock->lv; 708 lv = ilinelock->lv;
709 for (n = 0; n < ilinelock->index; n++, lv++) { 709 for (n = 0; n < ilinelock->index; n++, lv++) {
710 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 710 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
711 lv->length << L2XTSLOTSIZE); 711 lv->length << L2XTSLOTSIZE);
712 } 712 }
713 713
714 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 714 /* reset on-disk (metadata page) xtree XAD_NEW bit */
715 xad = &xp->xad[XTENTRYSTART]; 715 xad = &xp->xad[XTENTRYSTART];
716 for (n = XTENTRYSTART; 716 for (n = XTENTRYSTART;
717 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 717 n < le16_to_cpu(xp->header.nextindex); n++, xad++)
718 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 718 if (xad->flag & (XAD_NEW | XAD_EXTENDED))
719 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 719 xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
720 } 720 }
721 721
722 if ((lid = jfs_ip->blid) == 0) 722 if ((lid = jfs_ip->blid) == 0)
723 goto inlineData; 723 goto inlineData;
724 jfs_ip->blid = 0; 724 jfs_ip->blid = 0;
725 725
726 tlck = lid_to_tlock(lid); 726 tlck = lid_to_tlock(lid);
727 type = tlck->type; 727 type = tlck->type;
728 tlck->type |= tlckBTROOT; 728 tlck->type |= tlckBTROOT;
729 tlck->mp = mp; 729 tlck->mp = mp;
730 ilinelock = (struct linelock *) & tlck->lock; 730 ilinelock = (struct linelock *) & tlck->lock;
731 731
732 /* 732 /*
733 * regular file: 16 byte (XAD slot) granularity 733 * regular file: 16 byte (XAD slot) granularity
734 */ 734 */
735 if (type & tlckXTREE) { 735 if (type & tlckXTREE) {
736 xtpage_t *p, *xp; 736 xtpage_t *p, *xp;
737 xad_t *xad; 737 xad_t *xad;
738 738
739 /* 739 /*
740 * copy xtree root from inode to dinode: 740 * copy xtree root from inode to dinode:
741 */ 741 */
742 p = &jfs_ip->i_xtroot; 742 p = &jfs_ip->i_xtroot;
743 xp = &dp->di_xtroot; 743 xp = &dp->di_xtroot;
744 lv = ilinelock->lv; 744 lv = ilinelock->lv;
745 for (n = 0; n < ilinelock->index; n++, lv++) { 745 for (n = 0; n < ilinelock->index; n++, lv++) {
746 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 746 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
747 lv->length << L2XTSLOTSIZE); 747 lv->length << L2XTSLOTSIZE);
748 } 748 }
749 749
750 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 750 /* reset on-disk (metadata page) xtree XAD_NEW bit */
751 xad = &xp->xad[XTENTRYSTART]; 751 xad = &xp->xad[XTENTRYSTART];
752 for (n = XTENTRYSTART; 752 for (n = XTENTRYSTART;
753 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 753 n < le16_to_cpu(xp->header.nextindex); n++, xad++)
754 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 754 if (xad->flag & (XAD_NEW | XAD_EXTENDED))
755 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 755 xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
756 } 756 }
757 /* 757 /*
758 * directory: 32 byte (directory entry slot) granularity 758 * directory: 32 byte (directory entry slot) granularity
759 */ 759 */
760 else if (type & tlckDTREE) { 760 else if (type & tlckDTREE) {
761 dtpage_t *p, *xp; 761 dtpage_t *p, *xp;
762 762
763 /* 763 /*
764 * copy dtree root from inode to dinode: 764 * copy dtree root from inode to dinode:
765 */ 765 */
766 p = (dtpage_t *) &jfs_ip->i_dtroot; 766 p = (dtpage_t *) &jfs_ip->i_dtroot;
767 xp = (dtpage_t *) & dp->di_dtroot; 767 xp = (dtpage_t *) & dp->di_dtroot;
768 lv = ilinelock->lv; 768 lv = ilinelock->lv;
769 for (n = 0; n < ilinelock->index; n++, lv++) { 769 for (n = 0; n < ilinelock->index; n++, lv++) {
770 memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], 770 memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
771 lv->length << L2DTSLOTSIZE); 771 lv->length << L2DTSLOTSIZE);
772 } 772 }
773 } else { 773 } else {
774 jfs_err("diWrite: UFO tlock"); 774 jfs_err("diWrite: UFO tlock");
775 } 775 }
776 776
777 inlineData: 777 inlineData:
778 /* 778 /*
779 * copy inline symlink from in-memory inode to on-disk inode 779 * copy inline symlink from in-memory inode to on-disk inode
780 */ 780 */
781 if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { 781 if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
782 lv = & dilinelock->lv[dilinelock->index]; 782 lv = & dilinelock->lv[dilinelock->index];
783 lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; 783 lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
784 lv->length = 2; 784 lv->length = 2;
785 memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); 785 memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE);
786 dilinelock->index++; 786 dilinelock->index++;
787 } 787 }
788 /* 788 /*
789 * copy inline data from in-memory inode to on-disk inode: 789 * copy inline data from in-memory inode to on-disk inode:
790 * 128 byte slot granularity 790 * 128 byte slot granularity
791 */ 791 */
792 if (test_cflag(COMMIT_Inlineea, ip)) { 792 if (test_cflag(COMMIT_Inlineea, ip)) {
793 lv = & dilinelock->lv[dilinelock->index]; 793 lv = & dilinelock->lv[dilinelock->index];
794 lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; 794 lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
795 lv->length = 1; 795 lv->length = 1;
796 memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); 796 memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE);
797 dilinelock->index++; 797 dilinelock->index++;
798 798
799 clear_cflag(COMMIT_Inlineea, ip); 799 clear_cflag(COMMIT_Inlineea, ip);
800 } 800 }
801 801
802 /* 802 /*
803 * lock/copy inode base: 128 byte slot granularity 803 * lock/copy inode base: 128 byte slot granularity
804 */ 804 */
805 lv = & dilinelock->lv[dilinelock->index]; 805 lv = & dilinelock->lv[dilinelock->index];
806 lv->offset = dioffset >> L2INODESLOTSIZE; 806 lv->offset = dioffset >> L2INODESLOTSIZE;
807 copy_to_dinode(dp, ip); 807 copy_to_dinode(dp, ip);
808 if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { 808 if (test_and_clear_cflag(COMMIT_Dirtable, ip)) {
809 lv->length = 2; 809 lv->length = 2;
810 memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); 810 memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
811 } else 811 } else
812 lv->length = 1; 812 lv->length = 1;
813 dilinelock->index++; 813 dilinelock->index++;
814 814
815 /* release the buffer holding the updated on-disk inode. 815 /* release the buffer holding the updated on-disk inode.
816 * the buffer will be later written by commit processing. 816 * the buffer will be later written by commit processing.
817 */ 817 */
818 write_metapage(mp); 818 write_metapage(mp);
819 819
820 return (rc); 820 return (rc);
821 } 821 }
822 822
823 823
824 /* 824 /*
825 * NAME: diFree(ip) 825 * NAME: diFree(ip)
826 * 826 *
827 * FUNCTION: free a specified inode from the inode working map 827 * FUNCTION: free a specified inode from the inode working map
828 * for a fileset or aggregate. 828 * for a fileset or aggregate.
829 * 829 *
830 * if the inode to be freed represents the first (only) 830 * if the inode to be freed represents the first (only)
831 * free inode within the iag, the iag will be placed on 831 * free inode within the iag, the iag will be placed on
832 * the ag free inode list. 832 * the ag free inode list.
833 * 833 *
834 * freeing the inode will cause the inode extent to be 834 * freeing the inode will cause the inode extent to be
835 * freed if the inode is the only allocated inode within 835 * freed if the inode is the only allocated inode within
836 * the extent. in this case all the disk resource backing 836 * the extent. in this case all the disk resource backing
837 * up the inode extent will be freed. in addition, the iag 837 * up the inode extent will be freed. in addition, the iag
838 * will be placed on the ag extent free list if the extent 838 * will be placed on the ag extent free list if the extent
839 * is the first free extent in the iag. if freeing the 839 * is the first free extent in the iag. if freeing the
840 * extent also means that no free inodes will exist for 840 * extent also means that no free inodes will exist for
841 * the iag, the iag will also be removed from the ag free 841 * the iag, the iag will also be removed from the ag free
842 * inode list. 842 * inode list.
843 * 843 *
844 * the iag describing the inode will be freed if the extent 844 * the iag describing the inode will be freed if the extent
845 * is to be freed and it is the only backed extent within 845 * is to be freed and it is the only backed extent within
846 * the iag. in this case, the iag will be removed from the 846 * the iag. in this case, the iag will be removed from the
847 * ag free extent list and ag free inode list and placed on 847 * ag free extent list and ag free inode list and placed on
848 * the inode map's free iag list. 848 * the inode map's free iag list.
849 * 849 *
850 * a careful update approach is used to provide consistency 850 * a careful update approach is used to provide consistency
851 * in the face of updates to multiple buffers. under this 851 * in the face of updates to multiple buffers. under this
852 * approach, all required buffers are obtained before making 852 * approach, all required buffers are obtained before making
853 * any updates and are held until all updates are complete. 853 * any updates and are held until all updates are complete.
854 * 854 *
855 * PARAMETERS: 855 * PARAMETERS:
856 * ip - inode to be freed. 856 * ip - inode to be freed.
857 * 857 *
858 * RETURN VALUES: 858 * RETURN VALUES:
859 * 0 - success 859 * 0 - success
860 * -EIO - i/o error. 860 * -EIO - i/o error.
861 */ 861 */
862 int diFree(struct inode *ip) 862 int diFree(struct inode *ip)
863 { 863 {
864 int rc; 864 int rc;
865 ino_t inum = ip->i_ino; 865 ino_t inum = ip->i_ino;
866 struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; 866 struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp;
867 struct metapage *mp, *amp, *bmp, *cmp, *dmp; 867 struct metapage *mp, *amp, *bmp, *cmp, *dmp;
868 int iagno, ino, extno, bitno, sword, agno; 868 int iagno, ino, extno, bitno, sword, agno;
869 int back, fwd; 869 int back, fwd;
870 u32 bitmap, mask; 870 u32 bitmap, mask;
871 struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; 871 struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
872 struct inomap *imap = JFS_IP(ipimap)->i_imap; 872 struct inomap *imap = JFS_IP(ipimap)->i_imap;
873 pxd_t freepxd; 873 pxd_t freepxd;
874 tid_t tid; 874 tid_t tid;
875 struct inode *iplist[3]; 875 struct inode *iplist[3];
876 struct tlock *tlck; 876 struct tlock *tlck;
877 struct pxd_lock *pxdlock; 877 struct pxd_lock *pxdlock;
878 878
879 /* 879 /*
880 * This is just to suppress compiler warnings. The same logic that 880 * This is just to suppress compiler warnings. The same logic that
881 * references these variables is used to initialize them. 881 * references these variables is used to initialize them.
882 */ 882 */
883 aiagp = biagp = ciagp = diagp = NULL; 883 aiagp = biagp = ciagp = diagp = NULL;
884 884
885 /* get the iag number containing the inode. 885 /* get the iag number containing the inode.
886 */ 886 */
887 iagno = INOTOIAG(inum); 887 iagno = INOTOIAG(inum);
888 888
889 /* make sure that the iag is contained within 889 /* make sure that the iag is contained within
890 * the map. 890 * the map.
891 */ 891 */
892 if (iagno >= imap->im_nextiag) { 892 if (iagno >= imap->im_nextiag) {
893 print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, 893 print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
894 imap, 32, 0); 894 imap, 32, 0);
895 jfs_error(ip->i_sb, 895 jfs_error(ip->i_sb,
896 "diFree: inum = %d, iagno = %d, nextiag = %d", 896 "diFree: inum = %d, iagno = %d, nextiag = %d",
897 (uint) inum, iagno, imap->im_nextiag); 897 (uint) inum, iagno, imap->im_nextiag);
898 return -EIO; 898 return -EIO;
899 } 899 }
900 900
901 /* get the allocation group for this ino. 901 /* get the allocation group for this ino.
902 */ 902 */
903 agno = JFS_IP(ip)->agno; 903 agno = JFS_IP(ip)->agno;
904 904
905 /* Lock the AG specific inode map information 905 /* Lock the AG specific inode map information
906 */ 906 */
907 AG_LOCK(imap, agno); 907 AG_LOCK(imap, agno);
908 908
909 /* Obtain read lock in imap inode. Don't release it until we have 909 /* Obtain read lock in imap inode. Don't release it until we have
910 * read all of the IAG's that we are going to. 910 * read all of the IAG's that we are going to.
911 */ 911 */
912 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 912 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
913 913
914 /* read the iag. 914 /* read the iag.
915 */ 915 */
916 if ((rc = diIAGRead(imap, iagno, &mp))) { 916 if ((rc = diIAGRead(imap, iagno, &mp))) {
917 IREAD_UNLOCK(ipimap); 917 IREAD_UNLOCK(ipimap);
918 AG_UNLOCK(imap, agno); 918 AG_UNLOCK(imap, agno);
919 return (rc); 919 return (rc);
920 } 920 }
921 iagp = (struct iag *) mp->data; 921 iagp = (struct iag *) mp->data;
922 922
923 /* get the inode number and extent number of the inode within 923 /* get the inode number and extent number of the inode within
924 * the iag and the inode number within the extent. 924 * the iag and the inode number within the extent.
925 */ 925 */
926 ino = inum & (INOSPERIAG - 1); 926 ino = inum & (INOSPERIAG - 1);
927 extno = ino >> L2INOSPEREXT; 927 extno = ino >> L2INOSPEREXT;
928 bitno = ino & (INOSPEREXT - 1); 928 bitno = ino & (INOSPEREXT - 1);
929 mask = HIGHORDER >> bitno; 929 mask = HIGHORDER >> bitno;
930 930
931 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 931 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
932 jfs_error(ip->i_sb, 932 jfs_error(ip->i_sb,
933 "diFree: wmap shows inode already free"); 933 "diFree: wmap shows inode already free");
934 } 934 }
935 935
936 if (!addressPXD(&iagp->inoext[extno])) { 936 if (!addressPXD(&iagp->inoext[extno])) {
937 release_metapage(mp); 937 release_metapage(mp);
938 IREAD_UNLOCK(ipimap); 938 IREAD_UNLOCK(ipimap);
939 AG_UNLOCK(imap, agno); 939 AG_UNLOCK(imap, agno);
940 jfs_error(ip->i_sb, "diFree: invalid inoext"); 940 jfs_error(ip->i_sb, "diFree: invalid inoext");
941 return -EIO; 941 return -EIO;
942 } 942 }
943 943
944 /* compute the bitmap for the extent reflecting the freed inode. 944 /* compute the bitmap for the extent reflecting the freed inode.
945 */ 945 */
946 bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; 946 bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
947 947
948 if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { 948 if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
949 release_metapage(mp); 949 release_metapage(mp);
950 IREAD_UNLOCK(ipimap); 950 IREAD_UNLOCK(ipimap);
951 AG_UNLOCK(imap, agno); 951 AG_UNLOCK(imap, agno);
952 jfs_error(ip->i_sb, "diFree: numfree > numinos"); 952 jfs_error(ip->i_sb, "diFree: numfree > numinos");
953 return -EIO; 953 return -EIO;
954 } 954 }
955 /* 955 /*
956 * inode extent still has some inodes or below low water mark: 956 * inode extent still has some inodes or below low water mark:
957 * keep the inode extent; 957 * keep the inode extent;
958 */ 958 */
959 if (bitmap || 959 if (bitmap ||
960 imap->im_agctl[agno].numfree < 96 || 960 imap->im_agctl[agno].numfree < 96 ||
961 (imap->im_agctl[agno].numfree < 288 && 961 (imap->im_agctl[agno].numfree < 288 &&
962 (((imap->im_agctl[agno].numfree * 100) / 962 (((imap->im_agctl[agno].numfree * 100) /
963 imap->im_agctl[agno].numinos) <= 25))) { 963 imap->im_agctl[agno].numinos) <= 25))) {
964 /* if the iag currently has no free inodes (i.e., 964 /* if the iag currently has no free inodes (i.e.,
965 * the inode being freed is the first free inode of iag), 965 * the inode being freed is the first free inode of iag),
966 * insert the iag at head of the inode free list for the ag. 966 * insert the iag at head of the inode free list for the ag.
967 */ 967 */
968 if (iagp->nfreeinos == 0) { 968 if (iagp->nfreeinos == 0) {
969 /* check if there are any iags on the ag inode 969 /* check if there are any iags on the ag inode
970 * free list. if so, read the first one so that 970 * free list. if so, read the first one so that
971 * we can link the current iag onto the list at 971 * we can link the current iag onto the list at
972 * the head. 972 * the head.
973 */ 973 */
974 if ((fwd = imap->im_agctl[agno].inofree) >= 0) { 974 if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
975 /* read the iag that currently is the head 975 /* read the iag that currently is the head
976 * of the list. 976 * of the list.
977 */ 977 */
978 if ((rc = diIAGRead(imap, fwd, &amp))) { 978 if ((rc = diIAGRead(imap, fwd, &amp))) {
979 IREAD_UNLOCK(ipimap); 979 IREAD_UNLOCK(ipimap);
980 AG_UNLOCK(imap, agno); 980 AG_UNLOCK(imap, agno);
981 release_metapage(mp); 981 release_metapage(mp);
982 return (rc); 982 return (rc);
983 } 983 }
984 aiagp = (struct iag *) amp->data; 984 aiagp = (struct iag *) amp->data;
985 985
986 /* make current head point back to the iag. 986 /* make current head point back to the iag.
987 */ 987 */
988 aiagp->inofreeback = cpu_to_le32(iagno); 988 aiagp->inofreeback = cpu_to_le32(iagno);
989 989
990 write_metapage(amp); 990 write_metapage(amp);
991 } 991 }
992 992
993 /* iag points forward to current head and iag 993 /* iag points forward to current head and iag
994 * becomes the new head of the list. 994 * becomes the new head of the list.
995 */ 995 */
996 iagp->inofreefwd = 996 iagp->inofreefwd =
997 cpu_to_le32(imap->im_agctl[agno].inofree); 997 cpu_to_le32(imap->im_agctl[agno].inofree);
998 iagp->inofreeback = cpu_to_le32(-1); 998 iagp->inofreeback = cpu_to_le32(-1);
999 imap->im_agctl[agno].inofree = iagno; 999 imap->im_agctl[agno].inofree = iagno;
1000 } 1000 }
1001 IREAD_UNLOCK(ipimap); 1001 IREAD_UNLOCK(ipimap);
1002 1002
1003 /* update the free inode summary map for the extent if 1003 /* update the free inode summary map for the extent if
1004 * freeing the inode means the extent will now have free 1004 * freeing the inode means the extent will now have free
1005 * inodes (i.e., the inode being freed is the first free 1005 * inodes (i.e., the inode being freed is the first free
1006 * inode of extent), 1006 * inode of extent),
1007 */ 1007 */
1008 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 1008 if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
1009 sword = extno >> L2EXTSPERSUM; 1009 sword = extno >> L2EXTSPERSUM;
1010 bitno = extno & (EXTSPERSUM - 1); 1010 bitno = extno & (EXTSPERSUM - 1);
1011 iagp->inosmap[sword] &= 1011 iagp->inosmap[sword] &=
1012 cpu_to_le32(~(HIGHORDER >> bitno)); 1012 cpu_to_le32(~(HIGHORDER >> bitno));
1013 } 1013 }
1014 1014
1015 /* update the bitmap. 1015 /* update the bitmap.
1016 */ 1016 */
1017 iagp->wmap[extno] = cpu_to_le32(bitmap); 1017 iagp->wmap[extno] = cpu_to_le32(bitmap);
1018 1018
1019 /* update the free inode counts at the iag, ag and 1019 /* update the free inode counts at the iag, ag and
1020 * map level. 1020 * map level.
1021 */ 1021 */
1022 iagp->nfreeinos = 1022 le32_add_cpu(&iagp->nfreeinos, 1);
1023 cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1);
1024 imap->im_agctl[agno].numfree += 1; 1023 imap->im_agctl[agno].numfree += 1;
1025 atomic_inc(&imap->im_numfree); 1024 atomic_inc(&imap->im_numfree);
1026 1025
1027 /* release the AG inode map lock 1026 /* release the AG inode map lock
1028 */ 1027 */
1029 AG_UNLOCK(imap, agno); 1028 AG_UNLOCK(imap, agno);
1030 1029
1031 /* write the iag */ 1030 /* write the iag */
1032 write_metapage(mp); 1031 write_metapage(mp);
1033 1032
1034 return (0); 1033 return (0);
1035 } 1034 }
1036 1035
1037 1036
1038 /* 1037 /*
1039 * inode extent has become free and above low water mark: 1038 * inode extent has become free and above low water mark:
1040 * free the inode extent; 1039 * free the inode extent;
1041 */ 1040 */
1042 1041
1043 /* 1042 /*
1044 * prepare to update iag list(s) (careful update step 1) 1043 * prepare to update iag list(s) (careful update step 1)
1045 */ 1044 */
1046 amp = bmp = cmp = dmp = NULL; 1045 amp = bmp = cmp = dmp = NULL;
1047 fwd = back = -1; 1046 fwd = back = -1;
1048 1047
1049 /* check if the iag currently has no free extents. if so, 1048 /* check if the iag currently has no free extents. if so,
1050 * it will be placed on the head of the ag extent free list. 1049 * it will be placed on the head of the ag extent free list.
1051 */ 1050 */
1052 if (iagp->nfreeexts == 0) { 1051 if (iagp->nfreeexts == 0) {
1053 /* check if the ag extent free list has any iags. 1052 /* check if the ag extent free list has any iags.
1054 * if so, read the iag at the head of the list now. 1053 * if so, read the iag at the head of the list now.
1055 * this (head) iag will be updated later to reflect 1054 * this (head) iag will be updated later to reflect
1056 * the addition of the current iag at the head of 1055 * the addition of the current iag at the head of
1057 * the list. 1056 * the list.
1058 */ 1057 */
1059 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 1058 if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
1060 if ((rc = diIAGRead(imap, fwd, &amp))) 1059 if ((rc = diIAGRead(imap, fwd, &amp)))
1061 goto error_out; 1060 goto error_out;
1062 aiagp = (struct iag *) amp->data; 1061 aiagp = (struct iag *) amp->data;
1063 } 1062 }
1064 } else { 1063 } else {
1065 /* iag has free extents. check if the addition of a free 1064 /* iag has free extents. check if the addition of a free
1066 * extent will cause all extents to be free within this 1065 * extent will cause all extents to be free within this
1067 * iag. if so, the iag will be removed from the ag extent 1066 * iag. if so, the iag will be removed from the ag extent
1068 * free list and placed on the inode map's free iag list. 1067 * free list and placed on the inode map's free iag list.
1069 */ 1068 */
1070 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1069 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1071 /* in preparation for removing the iag from the 1070 /* in preparation for removing the iag from the
1072 * ag extent free list, read the iags preceeding 1071 * ag extent free list, read the iags preceeding
1073 * and following the iag on the ag extent free 1072 * and following the iag on the ag extent free
1074 * list. 1073 * list.
1075 */ 1074 */
1076 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 1075 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
1077 if ((rc = diIAGRead(imap, fwd, &amp))) 1076 if ((rc = diIAGRead(imap, fwd, &amp)))
1078 goto error_out; 1077 goto error_out;
1079 aiagp = (struct iag *) amp->data; 1078 aiagp = (struct iag *) amp->data;
1080 } 1079 }
1081 1080
1082 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 1081 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
1083 if ((rc = diIAGRead(imap, back, &bmp))) 1082 if ((rc = diIAGRead(imap, back, &bmp)))
1084 goto error_out; 1083 goto error_out;
1085 biagp = (struct iag *) bmp->data; 1084 biagp = (struct iag *) bmp->data;
1086 } 1085 }
1087 } 1086 }
1088 } 1087 }
1089 1088
1090 /* remove the iag from the ag inode free list if freeing 1089 /* remove the iag from the ag inode free list if freeing
1091 * this extent cause the iag to have no free inodes. 1090 * this extent cause the iag to have no free inodes.
1092 */ 1091 */
1093 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1092 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1094 int inofreeback = le32_to_cpu(iagp->inofreeback); 1093 int inofreeback = le32_to_cpu(iagp->inofreeback);
1095 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1094 int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1096 1095
1097 /* in preparation for removing the iag from the 1096 /* in preparation for removing the iag from the
1098 * ag inode free list, read the iags preceeding 1097 * ag inode free list, read the iags preceeding
1099 * and following the iag on the ag inode free 1098 * and following the iag on the ag inode free
1100 * list. before reading these iags, we must make 1099 * list. before reading these iags, we must make
1101 * sure that we already don't have them in hand 1100 * sure that we already don't have them in hand
1102 * from up above, since re-reading an iag (buffer) 1101 * from up above, since re-reading an iag (buffer)
1103 * we are currently holding would cause a deadlock. 1102 * we are currently holding would cause a deadlock.
1104 */ 1103 */
1105 if (inofreefwd >= 0) { 1104 if (inofreefwd >= 0) {
1106 1105
1107 if (inofreefwd == fwd) 1106 if (inofreefwd == fwd)
1108 ciagp = (struct iag *) amp->data; 1107 ciagp = (struct iag *) amp->data;
1109 else if (inofreefwd == back) 1108 else if (inofreefwd == back)
1110 ciagp = (struct iag *) bmp->data; 1109 ciagp = (struct iag *) bmp->data;
1111 else { 1110 else {
1112 if ((rc = 1111 if ((rc =
1113 diIAGRead(imap, inofreefwd, &cmp))) 1112 diIAGRead(imap, inofreefwd, &cmp)))
1114 goto error_out; 1113 goto error_out;
1115 ciagp = (struct iag *) cmp->data; 1114 ciagp = (struct iag *) cmp->data;
1116 } 1115 }
1117 assert(ciagp != NULL); 1116 assert(ciagp != NULL);
1118 } 1117 }
1119 1118
1120 if (inofreeback >= 0) { 1119 if (inofreeback >= 0) {
1121 if (inofreeback == fwd) 1120 if (inofreeback == fwd)
1122 diagp = (struct iag *) amp->data; 1121 diagp = (struct iag *) amp->data;
1123 else if (inofreeback == back) 1122 else if (inofreeback == back)
1124 diagp = (struct iag *) bmp->data; 1123 diagp = (struct iag *) bmp->data;
1125 else { 1124 else {
1126 if ((rc = 1125 if ((rc =
1127 diIAGRead(imap, inofreeback, &dmp))) 1126 diIAGRead(imap, inofreeback, &dmp)))
1128 goto error_out; 1127 goto error_out;
1129 diagp = (struct iag *) dmp->data; 1128 diagp = (struct iag *) dmp->data;
1130 } 1129 }
1131 assert(diagp != NULL); 1130 assert(diagp != NULL);
1132 } 1131 }
1133 } 1132 }
1134 1133
1135 IREAD_UNLOCK(ipimap); 1134 IREAD_UNLOCK(ipimap);
1136 1135
1137 /* 1136 /*
1138 * invalidate any page of the inode extent freed from buffer cache; 1137 * invalidate any page of the inode extent freed from buffer cache;
1139 */ 1138 */
1140 freepxd = iagp->inoext[extno]; 1139 freepxd = iagp->inoext[extno];
1141 invalidate_pxd_metapages(ip, freepxd); 1140 invalidate_pxd_metapages(ip, freepxd);
1142 1141
1143 /* 1142 /*
1144 * update iag list(s) (careful update step 2) 1143 * update iag list(s) (careful update step 2)
1145 */ 1144 */
1146 /* add the iag to the ag extent free list if this is the 1145 /* add the iag to the ag extent free list if this is the
1147 * first free extent for the iag. 1146 * first free extent for the iag.
1148 */ 1147 */
1149 if (iagp->nfreeexts == 0) { 1148 if (iagp->nfreeexts == 0) {
1150 if (fwd >= 0) 1149 if (fwd >= 0)
1151 aiagp->extfreeback = cpu_to_le32(iagno); 1150 aiagp->extfreeback = cpu_to_le32(iagno);
1152 1151
1153 iagp->extfreefwd = 1152 iagp->extfreefwd =
1154 cpu_to_le32(imap->im_agctl[agno].extfree); 1153 cpu_to_le32(imap->im_agctl[agno].extfree);
1155 iagp->extfreeback = cpu_to_le32(-1); 1154 iagp->extfreeback = cpu_to_le32(-1);
1156 imap->im_agctl[agno].extfree = iagno; 1155 imap->im_agctl[agno].extfree = iagno;
1157 } else { 1156 } else {
1158 /* remove the iag from the ag extent list if all extents 1157 /* remove the iag from the ag extent list if all extents
1159 * are now free and place it on the inode map iag free list. 1158 * are now free and place it on the inode map iag free list.
1160 */ 1159 */
1161 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1160 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1162 if (fwd >= 0) 1161 if (fwd >= 0)
1163 aiagp->extfreeback = iagp->extfreeback; 1162 aiagp->extfreeback = iagp->extfreeback;
1164 1163
1165 if (back >= 0) 1164 if (back >= 0)
1166 biagp->extfreefwd = iagp->extfreefwd; 1165 biagp->extfreefwd = iagp->extfreefwd;
1167 else 1166 else
1168 imap->im_agctl[agno].extfree = 1167 imap->im_agctl[agno].extfree =
1169 le32_to_cpu(iagp->extfreefwd); 1168 le32_to_cpu(iagp->extfreefwd);
1170 1169
1171 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 1170 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
1172 1171
1173 IAGFREE_LOCK(imap); 1172 IAGFREE_LOCK(imap);
1174 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1173 iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1175 imap->im_freeiag = iagno; 1174 imap->im_freeiag = iagno;
1176 IAGFREE_UNLOCK(imap); 1175 IAGFREE_UNLOCK(imap);
1177 } 1176 }
1178 } 1177 }
1179 1178
1180 /* remove the iag from the ag inode free list if freeing 1179 /* remove the iag from the ag inode free list if freeing
1181 * this extent causes the iag to have no free inodes. 1180 * this extent causes the iag to have no free inodes.
1182 */ 1181 */
1183 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1182 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1184 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) 1183 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
1185 ciagp->inofreeback = iagp->inofreeback; 1184 ciagp->inofreeback = iagp->inofreeback;
1186 1185
1187 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) 1186 if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
1188 diagp->inofreefwd = iagp->inofreefwd; 1187 diagp->inofreefwd = iagp->inofreefwd;
1189 else 1188 else
1190 imap->im_agctl[agno].inofree = 1189 imap->im_agctl[agno].inofree =
1191 le32_to_cpu(iagp->inofreefwd); 1190 le32_to_cpu(iagp->inofreefwd);
1192 1191
1193 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 1192 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
1194 } 1193 }
1195 1194
1196 /* update the inode extent address and working map 1195 /* update the inode extent address and working map
1197 * to reflect the free extent. 1196 * to reflect the free extent.
1198 * the permanent map should have been updated already 1197 * the permanent map should have been updated already
1199 * for the inode being freed. 1198 * for the inode being freed.
1200 */ 1199 */
1201 if (iagp->pmap[extno] != 0) { 1200 if (iagp->pmap[extno] != 0) {
1202 jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); 1201 jfs_error(ip->i_sb, "diFree: the pmap does not show inode free");
1203 } 1202 }
1204 iagp->wmap[extno] = 0; 1203 iagp->wmap[extno] = 0;
1205 PXDlength(&iagp->inoext[extno], 0); 1204 PXDlength(&iagp->inoext[extno], 0);
1206 PXDaddress(&iagp->inoext[extno], 0); 1205 PXDaddress(&iagp->inoext[extno], 0);
1207 1206
1208 /* update the free extent and free inode summary maps 1207 /* update the free extent and free inode summary maps
1209 * to reflect the freed extent. 1208 * to reflect the freed extent.
1210 * the inode summary map is marked to indicate no inodes 1209 * the inode summary map is marked to indicate no inodes
1211 * available for the freed extent. 1210 * available for the freed extent.
1212 */ 1211 */
1213 sword = extno >> L2EXTSPERSUM; 1212 sword = extno >> L2EXTSPERSUM;
1214 bitno = extno & (EXTSPERSUM - 1); 1213 bitno = extno & (EXTSPERSUM - 1);
1215 mask = HIGHORDER >> bitno; 1214 mask = HIGHORDER >> bitno;
1216 iagp->inosmap[sword] |= cpu_to_le32(mask); 1215 iagp->inosmap[sword] |= cpu_to_le32(mask);
1217 iagp->extsmap[sword] &= cpu_to_le32(~mask); 1216 iagp->extsmap[sword] &= cpu_to_le32(~mask);
1218 1217
1219 /* update the number of free inodes and number of free extents 1218 /* update the number of free inodes and number of free extents
1220 * for the iag. 1219 * for the iag.
1221 */ 1220 */
1222 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1221 le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
1223 (INOSPEREXT - 1)); 1222 le32_add_cpu(&iagp->nfreeexts, 1);
1224 iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1);
1225 1223
1226 /* update the number of free inodes and backed inodes 1224 /* update the number of free inodes and backed inodes
1227 * at the ag and inode map level. 1225 * at the ag and inode map level.
1228 */ 1226 */
1229 imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); 1227 imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
1230 imap->im_agctl[agno].numinos -= INOSPEREXT; 1228 imap->im_agctl[agno].numinos -= INOSPEREXT;
1231 atomic_sub(INOSPEREXT - 1, &imap->im_numfree); 1229 atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
1232 atomic_sub(INOSPEREXT, &imap->im_numinos); 1230 atomic_sub(INOSPEREXT, &imap->im_numinos);
1233 1231
1234 if (amp) 1232 if (amp)
1235 write_metapage(amp); 1233 write_metapage(amp);
1236 if (bmp) 1234 if (bmp)
1237 write_metapage(bmp); 1235 write_metapage(bmp);
1238 if (cmp) 1236 if (cmp)
1239 write_metapage(cmp); 1237 write_metapage(cmp);
1240 if (dmp) 1238 if (dmp)
1241 write_metapage(dmp); 1239 write_metapage(dmp);
1242 1240
1243 /* 1241 /*
1244 * start transaction to update block allocation map 1242 * start transaction to update block allocation map
1245 * for the inode extent freed; 1243 * for the inode extent freed;
1246 * 1244 *
1247 * N.B. AG_LOCK is released and iag will be released below, and 1245 * N.B. AG_LOCK is released and iag will be released below, and
1248 * other thread may allocate inode from/reusing the ixad freed 1246 * other thread may allocate inode from/reusing the ixad freed
1249 * BUT with new/different backing inode extent from the extent 1247 * BUT with new/different backing inode extent from the extent
1250 * to be freed by the transaction; 1248 * to be freed by the transaction;
1251 */ 1249 */
1252 tid = txBegin(ipimap->i_sb, COMMIT_FORCE); 1250 tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
1253 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 1251 mutex_lock(&JFS_IP(ipimap)->commit_mutex);
1254 1252
1255 /* acquire tlock of the iag page of the freed ixad 1253 /* acquire tlock of the iag page of the freed ixad
1256 * to force the page NOHOMEOK (even though no data is 1254 * to force the page NOHOMEOK (even though no data is
1257 * logged from the iag page) until NOREDOPAGE|FREEXTENT log 1255 * logged from the iag page) until NOREDOPAGE|FREEXTENT log
1258 * for the free of the extent is committed; 1256 * for the free of the extent is committed;
1259 * write FREEXTENT|NOREDOPAGE log record 1257 * write FREEXTENT|NOREDOPAGE log record
1260 * N.B. linelock is overlaid as freed extent descriptor; 1258 * N.B. linelock is overlaid as freed extent descriptor;
1261 */ 1259 */
1262 tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); 1260 tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
1263 pxdlock = (struct pxd_lock *) & tlck->lock; 1261 pxdlock = (struct pxd_lock *) & tlck->lock;
1264 pxdlock->flag = mlckFREEPXD; 1262 pxdlock->flag = mlckFREEPXD;
1265 pxdlock->pxd = freepxd; 1263 pxdlock->pxd = freepxd;
1266 pxdlock->index = 1; 1264 pxdlock->index = 1;
1267 1265
1268 write_metapage(mp); 1266 write_metapage(mp);
1269 1267
1270 iplist[0] = ipimap; 1268 iplist[0] = ipimap;
1271 1269
1272 /* 1270 /*
1273 * logredo needs the IAG number and IAG extent index in order 1271 * logredo needs the IAG number and IAG extent index in order
1274 * to ensure that the IMap is consistent. The least disruptive 1272 * to ensure that the IMap is consistent. The least disruptive
1275 * way to pass these values through to the transaction manager 1273 * way to pass these values through to the transaction manager
1276 * is in the iplist array. 1274 * is in the iplist array.
1277 * 1275 *
1278 * It's not pretty, but it works. 1276 * It's not pretty, but it works.
1279 */ 1277 */
1280 iplist[1] = (struct inode *) (size_t)iagno; 1278 iplist[1] = (struct inode *) (size_t)iagno;
1281 iplist[2] = (struct inode *) (size_t)extno; 1279 iplist[2] = (struct inode *) (size_t)extno;
1282 1280
1283 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 1281 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
1284 1282
1285 txEnd(tid); 1283 txEnd(tid);
1286 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 1284 mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
1287 1285
1288 /* unlock the AG inode map information */ 1286 /* unlock the AG inode map information */
1289 AG_UNLOCK(imap, agno); 1287 AG_UNLOCK(imap, agno);
1290 1288
1291 return (0); 1289 return (0);
1292 1290
1293 error_out: 1291 error_out:
1294 IREAD_UNLOCK(ipimap); 1292 IREAD_UNLOCK(ipimap);
1295 1293
1296 if (amp) 1294 if (amp)
1297 release_metapage(amp); 1295 release_metapage(amp);
1298 if (bmp) 1296 if (bmp)
1299 release_metapage(bmp); 1297 release_metapage(bmp);
1300 if (cmp) 1298 if (cmp)
1301 release_metapage(cmp); 1299 release_metapage(cmp);
1302 if (dmp) 1300 if (dmp)
1303 release_metapage(dmp); 1301 release_metapage(dmp);
1304 1302
1305 AG_UNLOCK(imap, agno); 1303 AG_UNLOCK(imap, agno);
1306 1304
1307 release_metapage(mp); 1305 release_metapage(mp);
1308 1306
1309 return (rc); 1307 return (rc);
1310 } 1308 }
1311 1309
1312 /* 1310 /*
1313 * There are several places in the diAlloc* routines where we initialize 1311 * There are several places in the diAlloc* routines where we initialize
1314 * the inode. 1312 * the inode.
1315 */ 1313 */
1316 static inline void 1314 static inline void
1317 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) 1315 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1318 { 1316 {
1319 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 1317 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
1320 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1318 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1321 1319
1322 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1320 ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1323 jfs_ip->ixpxd = iagp->inoext[extno]; 1321 jfs_ip->ixpxd = iagp->inoext[extno];
1324 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 1322 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
1325 jfs_ip->active_ag = -1; 1323 jfs_ip->active_ag = -1;
1326 } 1324 }
1327 1325
1328 1326
1329 /* 1327 /*
1330 * NAME: diAlloc(pip,dir,ip) 1328 * NAME: diAlloc(pip,dir,ip)
1331 * 1329 *
1332 * FUNCTION: allocate a disk inode from the inode working map 1330 * FUNCTION: allocate a disk inode from the inode working map
1333 * for a fileset or aggregate. 1331 * for a fileset or aggregate.
1334 * 1332 *
1335 * PARAMETERS: 1333 * PARAMETERS:
1336 * pip - pointer to incore inode for the parent inode. 1334 * pip - pointer to incore inode for the parent inode.
1337 * dir - 'true' if the new disk inode is for a directory. 1335 * dir - 'true' if the new disk inode is for a directory.
1338 * ip - pointer to a new inode 1336 * ip - pointer to a new inode
1339 * 1337 *
1340 * RETURN VALUES: 1338 * RETURN VALUES:
1341 * 0 - success. 1339 * 0 - success.
1342 * -ENOSPC - insufficient disk resources. 1340 * -ENOSPC - insufficient disk resources.
1343 * -EIO - i/o error. 1341 * -EIO - i/o error.
1344 */ 1342 */
1345 int diAlloc(struct inode *pip, bool dir, struct inode *ip) 1343 int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1346 { 1344 {
1347 int rc, ino, iagno, addext, extno, bitno, sword; 1345 int rc, ino, iagno, addext, extno, bitno, sword;
1348 int nwords, rem, i, agno; 1346 int nwords, rem, i, agno;
1349 u32 mask, inosmap, extsmap; 1347 u32 mask, inosmap, extsmap;
1350 struct inode *ipimap; 1348 struct inode *ipimap;
1351 struct metapage *mp; 1349 struct metapage *mp;
1352 ino_t inum; 1350 ino_t inum;
1353 struct iag *iagp; 1351 struct iag *iagp;
1354 struct inomap *imap; 1352 struct inomap *imap;
1355 1353
1356 /* get the pointers to the inode map inode and the 1354 /* get the pointers to the inode map inode and the
1357 * corresponding imap control structure. 1355 * corresponding imap control structure.
1358 */ 1356 */
1359 ipimap = JFS_SBI(pip->i_sb)->ipimap; 1357 ipimap = JFS_SBI(pip->i_sb)->ipimap;
1360 imap = JFS_IP(ipimap)->i_imap; 1358 imap = JFS_IP(ipimap)->i_imap;
1361 JFS_IP(ip)->ipimap = ipimap; 1359 JFS_IP(ip)->ipimap = ipimap;
1362 JFS_IP(ip)->fileset = FILESYSTEM_I; 1360 JFS_IP(ip)->fileset = FILESYSTEM_I;
1363 1361
1364 /* for a directory, the allocation policy is to start 1362 /* for a directory, the allocation policy is to start
1365 * at the ag level using the preferred ag. 1363 * at the ag level using the preferred ag.
1366 */ 1364 */
1367 if (dir) { 1365 if (dir) {
1368 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1366 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1369 AG_LOCK(imap, agno); 1367 AG_LOCK(imap, agno);
1370 goto tryag; 1368 goto tryag;
1371 } 1369 }
1372 1370
1373 /* for files, the policy starts off by trying to allocate from 1371 /* for files, the policy starts off by trying to allocate from
1374 * the same iag containing the parent disk inode: 1372 * the same iag containing the parent disk inode:
1375 * try to allocate the new disk inode close to the parent disk 1373 * try to allocate the new disk inode close to the parent disk
1376 * inode, using parent disk inode number + 1 as the allocation 1374 * inode, using parent disk inode number + 1 as the allocation
1377 * hint. (we use a left-to-right policy to attempt to avoid 1375 * hint. (we use a left-to-right policy to attempt to avoid
1378 * moving backward on the disk.) compute the hint within the 1376 * moving backward on the disk.) compute the hint within the
1379 * file system and the iag. 1377 * file system and the iag.
1380 */ 1378 */
1381 1379
1382 /* get the ag number of this iag */ 1380 /* get the ag number of this iag */
1383 agno = JFS_IP(pip)->agno; 1381 agno = JFS_IP(pip)->agno;
1384 1382
1385 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { 1383 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
1386 /* 1384 /*
1387 * There is an open file actively growing. We want to 1385 * There is an open file actively growing. We want to
1388 * allocate new inodes from a different ag to avoid 1386 * allocate new inodes from a different ag to avoid
1389 * fragmentation problems. 1387 * fragmentation problems.
1390 */ 1388 */
1391 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1389 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1392 AG_LOCK(imap, agno); 1390 AG_LOCK(imap, agno);
1393 goto tryag; 1391 goto tryag;
1394 } 1392 }
1395 1393
1396 inum = pip->i_ino + 1; 1394 inum = pip->i_ino + 1;
1397 ino = inum & (INOSPERIAG - 1); 1395 ino = inum & (INOSPERIAG - 1);
1398 1396
1399 /* back off the hint if it is outside of the iag */ 1397 /* back off the hint if it is outside of the iag */
1400 if (ino == 0) 1398 if (ino == 0)
1401 inum = pip->i_ino; 1399 inum = pip->i_ino;
1402 1400
1403 /* lock the AG inode map information */ 1401 /* lock the AG inode map information */
1404 AG_LOCK(imap, agno); 1402 AG_LOCK(imap, agno);
1405 1403
1406 /* Get read lock on imap inode */ 1404 /* Get read lock on imap inode */
1407 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 1405 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
1408 1406
1409 /* get the iag number and read the iag */ 1407 /* get the iag number and read the iag */
1410 iagno = INOTOIAG(inum); 1408 iagno = INOTOIAG(inum);
1411 if ((rc = diIAGRead(imap, iagno, &mp))) { 1409 if ((rc = diIAGRead(imap, iagno, &mp))) {
1412 IREAD_UNLOCK(ipimap); 1410 IREAD_UNLOCK(ipimap);
1413 AG_UNLOCK(imap, agno); 1411 AG_UNLOCK(imap, agno);
1414 return (rc); 1412 return (rc);
1415 } 1413 }
1416 iagp = (struct iag *) mp->data; 1414 iagp = (struct iag *) mp->data;
1417 1415
1418 /* determine if new inode extent is allowed to be added to the iag. 1416 /* determine if new inode extent is allowed to be added to the iag.
1419 * new inode extent can be added to the iag if the ag 1417 * new inode extent can be added to the iag if the ag
1420 * has less than 32 free disk inodes and the iag has free extents. 1418 * has less than 32 free disk inodes and the iag has free extents.
1421 */ 1419 */
1422 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); 1420 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
1423 1421
1424 /* 1422 /*
1425 * try to allocate from the IAG 1423 * try to allocate from the IAG
1426 */ 1424 */
1427 /* check if the inode may be allocated from the iag 1425 /* check if the inode may be allocated from the iag
1428 * (i.e. the inode has free inodes or new extent can be added). 1426 * (i.e. the inode has free inodes or new extent can be added).
1429 */ 1427 */
1430 if (iagp->nfreeinos || addext) { 1428 if (iagp->nfreeinos || addext) {
1431 /* determine the extent number of the hint. 1429 /* determine the extent number of the hint.
1432 */ 1430 */
1433 extno = ino >> L2INOSPEREXT; 1431 extno = ino >> L2INOSPEREXT;
1434 1432
1435 /* check if the extent containing the hint has backed 1433 /* check if the extent containing the hint has backed
1436 * inodes. if so, try to allocate within this extent. 1434 * inodes. if so, try to allocate within this extent.
1437 */ 1435 */
1438 if (addressPXD(&iagp->inoext[extno])) { 1436 if (addressPXD(&iagp->inoext[extno])) {
1439 bitno = ino & (INOSPEREXT - 1); 1437 bitno = ino & (INOSPEREXT - 1);
1440 if ((bitno = 1438 if ((bitno =
1441 diFindFree(le32_to_cpu(iagp->wmap[extno]), 1439 diFindFree(le32_to_cpu(iagp->wmap[extno]),
1442 bitno)) 1440 bitno))
1443 < INOSPEREXT) { 1441 < INOSPEREXT) {
1444 ino = (extno << L2INOSPEREXT) + bitno; 1442 ino = (extno << L2INOSPEREXT) + bitno;
1445 1443
1446 /* a free inode (bit) was found within this 1444 /* a free inode (bit) was found within this
1447 * extent, so allocate it. 1445 * extent, so allocate it.
1448 */ 1446 */
1449 rc = diAllocBit(imap, iagp, ino); 1447 rc = diAllocBit(imap, iagp, ino);
1450 IREAD_UNLOCK(ipimap); 1448 IREAD_UNLOCK(ipimap);
1451 if (rc) { 1449 if (rc) {
1452 assert(rc == -EIO); 1450 assert(rc == -EIO);
1453 } else { 1451 } else {
1454 /* set the results of the allocation 1452 /* set the results of the allocation
1455 * and write the iag. 1453 * and write the iag.
1456 */ 1454 */
1457 diInitInode(ip, iagno, ino, extno, 1455 diInitInode(ip, iagno, ino, extno,
1458 iagp); 1456 iagp);
1459 mark_metapage_dirty(mp); 1457 mark_metapage_dirty(mp);
1460 } 1458 }
1461 release_metapage(mp); 1459 release_metapage(mp);
1462 1460
1463 /* free the AG lock and return. 1461 /* free the AG lock and return.
1464 */ 1462 */
1465 AG_UNLOCK(imap, agno); 1463 AG_UNLOCK(imap, agno);
1466 return (rc); 1464 return (rc);
1467 } 1465 }
1468 1466
1469 if (!addext) 1467 if (!addext)
1470 extno = 1468 extno =
1471 (extno == 1469 (extno ==
1472 EXTSPERIAG - 1) ? 0 : extno + 1; 1470 EXTSPERIAG - 1) ? 0 : extno + 1;
1473 } 1471 }
1474 1472
1475 /* 1473 /*
1476 * no free inodes within the extent containing the hint. 1474 * no free inodes within the extent containing the hint.
1477 * 1475 *
1478 * try to allocate from the backed extents following 1476 * try to allocate from the backed extents following
1479 * hint or, if appropriate (i.e. addext is true), allocate 1477 * hint or, if appropriate (i.e. addext is true), allocate
1480 * an extent of free inodes at or following the extent 1478 * an extent of free inodes at or following the extent
1481 * containing the hint. 1479 * containing the hint.
1482 * 1480 *
1483 * the free inode and free extent summary maps are used 1481 * the free inode and free extent summary maps are used
1484 * here, so determine the starting summary map position 1482 * here, so determine the starting summary map position
1485 * and the number of words we'll have to examine. again, 1483 * and the number of words we'll have to examine. again,
1486 * the approach is to allocate following the hint, so we 1484 * the approach is to allocate following the hint, so we
1487 * might have to initially ignore prior bits of the summary 1485 * might have to initially ignore prior bits of the summary
1488 * map that represent extents prior to the extent containing 1486 * map that represent extents prior to the extent containing
1489 * the hint and later revisit these bits. 1487 * the hint and later revisit these bits.
1490 */ 1488 */
1491 bitno = extno & (EXTSPERSUM - 1); 1489 bitno = extno & (EXTSPERSUM - 1);
1492 nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; 1490 nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
1493 sword = extno >> L2EXTSPERSUM; 1491 sword = extno >> L2EXTSPERSUM;
1494 1492
1495 /* mask any prior bits for the starting words of the 1493 /* mask any prior bits for the starting words of the
1496 * summary map. 1494 * summary map.
1497 */ 1495 */
1498 mask = ONES << (EXTSPERSUM - bitno); 1496 mask = ONES << (EXTSPERSUM - bitno);
1499 inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; 1497 inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
1500 extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; 1498 extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
1501 1499
1502 /* scan the free inode and free extent summary maps for 1500 /* scan the free inode and free extent summary maps for
1503 * free resources. 1501 * free resources.
1504 */ 1502 */
1505 for (i = 0; i < nwords; i++) { 1503 for (i = 0; i < nwords; i++) {
1506 /* check if this word of the free inode summary 1504 /* check if this word of the free inode summary
1507 * map describes an extent with free inodes. 1505 * map describes an extent with free inodes.
1508 */ 1506 */
1509 if (~inosmap) { 1507 if (~inosmap) {
1510 /* an extent with free inodes has been 1508 /* an extent with free inodes has been
1511 * found. determine the extent number 1509 * found. determine the extent number
1512 * and the inode number within the extent. 1510 * and the inode number within the extent.
1513 */ 1511 */
1514 rem = diFindFree(inosmap, 0); 1512 rem = diFindFree(inosmap, 0);
1515 extno = (sword << L2EXTSPERSUM) + rem; 1513 extno = (sword << L2EXTSPERSUM) + rem;
1516 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 1514 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]),
1517 0); 1515 0);
1518 if (rem >= INOSPEREXT) { 1516 if (rem >= INOSPEREXT) {
1519 IREAD_UNLOCK(ipimap); 1517 IREAD_UNLOCK(ipimap);
1520 release_metapage(mp); 1518 release_metapage(mp);
1521 AG_UNLOCK(imap, agno); 1519 AG_UNLOCK(imap, agno);
1522 jfs_error(ip->i_sb, 1520 jfs_error(ip->i_sb,
1523 "diAlloc: can't find free bit " 1521 "diAlloc: can't find free bit "
1524 "in wmap"); 1522 "in wmap");
1525 return EIO; 1523 return EIO;
1526 } 1524 }
1527 1525
1528 /* determine the inode number within the 1526 /* determine the inode number within the
1529 * iag and allocate the inode from the 1527 * iag and allocate the inode from the
1530 * map. 1528 * map.
1531 */ 1529 */
1532 ino = (extno << L2INOSPEREXT) + rem; 1530 ino = (extno << L2INOSPEREXT) + rem;
1533 rc = diAllocBit(imap, iagp, ino); 1531 rc = diAllocBit(imap, iagp, ino);
1534 IREAD_UNLOCK(ipimap); 1532 IREAD_UNLOCK(ipimap);
1535 if (rc) 1533 if (rc)
1536 assert(rc == -EIO); 1534 assert(rc == -EIO);
1537 else { 1535 else {
1538 /* set the results of the allocation 1536 /* set the results of the allocation
1539 * and write the iag. 1537 * and write the iag.
1540 */ 1538 */
1541 diInitInode(ip, iagno, ino, extno, 1539 diInitInode(ip, iagno, ino, extno,
1542 iagp); 1540 iagp);
1543 mark_metapage_dirty(mp); 1541 mark_metapage_dirty(mp);
1544 } 1542 }
1545 release_metapage(mp); 1543 release_metapage(mp);
1546 1544
1547 /* free the AG lock and return. 1545 /* free the AG lock and return.
1548 */ 1546 */
1549 AG_UNLOCK(imap, agno); 1547 AG_UNLOCK(imap, agno);
1550 return (rc); 1548 return (rc);
1551 1549
1552 } 1550 }
1553 1551
1554 /* check if we may allocate an extent of free 1552 /* check if we may allocate an extent of free
1555 * inodes and whether this word of the free 1553 * inodes and whether this word of the free
1556 * extents summary map describes a free extent. 1554 * extents summary map describes a free extent.
1557 */ 1555 */
1558 if (addext && ~extsmap) { 1556 if (addext && ~extsmap) {
1559 /* a free extent has been found. determine 1557 /* a free extent has been found. determine
1560 * the extent number. 1558 * the extent number.
1561 */ 1559 */
1562 rem = diFindFree(extsmap, 0); 1560 rem = diFindFree(extsmap, 0);
1563 extno = (sword << L2EXTSPERSUM) + rem; 1561 extno = (sword << L2EXTSPERSUM) + rem;
1564 1562
1565 /* allocate an extent of free inodes. 1563 /* allocate an extent of free inodes.
1566 */ 1564 */
1567 if ((rc = diNewExt(imap, iagp, extno))) { 1565 if ((rc = diNewExt(imap, iagp, extno))) {
1568 /* if there is no disk space for a 1566 /* if there is no disk space for a
1569 * new extent, try to allocate the 1567 * new extent, try to allocate the
1570 * disk inode from somewhere else. 1568 * disk inode from somewhere else.
1571 */ 1569 */
1572 if (rc == -ENOSPC) 1570 if (rc == -ENOSPC)
1573 break; 1571 break;
1574 1572
1575 assert(rc == -EIO); 1573 assert(rc == -EIO);
1576 } else { 1574 } else {
1577 /* set the results of the allocation 1575 /* set the results of the allocation
1578 * and write the iag. 1576 * and write the iag.
1579 */ 1577 */
1580 diInitInode(ip, iagno, 1578 diInitInode(ip, iagno,
1581 extno << L2INOSPEREXT, 1579 extno << L2INOSPEREXT,
1582 extno, iagp); 1580 extno, iagp);
1583 mark_metapage_dirty(mp); 1581 mark_metapage_dirty(mp);
1584 } 1582 }
1585 release_metapage(mp); 1583 release_metapage(mp);
1586 /* free the imap inode & the AG lock & return. 1584 /* free the imap inode & the AG lock & return.
1587 */ 1585 */
1588 IREAD_UNLOCK(ipimap); 1586 IREAD_UNLOCK(ipimap);
1589 AG_UNLOCK(imap, agno); 1587 AG_UNLOCK(imap, agno);
1590 return (rc); 1588 return (rc);
1591 } 1589 }
1592 1590
1593 /* move on to the next set of summary map words. 1591 /* move on to the next set of summary map words.
1594 */ 1592 */
1595 sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; 1593 sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
1596 inosmap = le32_to_cpu(iagp->inosmap[sword]); 1594 inosmap = le32_to_cpu(iagp->inosmap[sword]);
1597 extsmap = le32_to_cpu(iagp->extsmap[sword]); 1595 extsmap = le32_to_cpu(iagp->extsmap[sword]);
1598 } 1596 }
1599 } 1597 }
1600 /* unlock imap inode */ 1598 /* unlock imap inode */
1601 IREAD_UNLOCK(ipimap); 1599 IREAD_UNLOCK(ipimap);
1602 1600
1603 /* nothing doing in this iag, so release it. */ 1601 /* nothing doing in this iag, so release it. */
1604 release_metapage(mp); 1602 release_metapage(mp);
1605 1603
1606 tryag: 1604 tryag:
1607 /* 1605 /*
1608 * try to allocate anywhere within the same AG as the parent inode. 1606 * try to allocate anywhere within the same AG as the parent inode.
1609 */ 1607 */
1610 rc = diAllocAG(imap, agno, dir, ip); 1608 rc = diAllocAG(imap, agno, dir, ip);
1611 1609
1612 AG_UNLOCK(imap, agno); 1610 AG_UNLOCK(imap, agno);
1613 1611
1614 if (rc != -ENOSPC) 1612 if (rc != -ENOSPC)
1615 return (rc); 1613 return (rc);
1616 1614
1617 /* 1615 /*
1618 * try to allocate in any AG. 1616 * try to allocate in any AG.
1619 */ 1617 */
1620 return (diAllocAny(imap, agno, dir, ip)); 1618 return (diAllocAny(imap, agno, dir, ip));
1621 } 1619 }
1622 1620
1623 1621
1624 /* 1622 /*
1625 * NAME: diAllocAG(imap,agno,dir,ip) 1623 * NAME: diAllocAG(imap,agno,dir,ip)
1626 * 1624 *
1627 * FUNCTION: allocate a disk inode from the allocation group. 1625 * FUNCTION: allocate a disk inode from the allocation group.
1628 * 1626 *
1629 * this routine first determines if a new extent of free 1627 * this routine first determines if a new extent of free
1630 * inodes should be added for the allocation group, with 1628 * inodes should be added for the allocation group, with
1631 * the current request satisfied from this extent. if this 1629 * the current request satisfied from this extent. if this
1632 * is the case, an attempt will be made to do just that. if 1630 * is the case, an attempt will be made to do just that. if
1633 * this attempt fails or it has been determined that a new 1631 * this attempt fails or it has been determined that a new
1634 * extent should not be added, an attempt is made to satisfy 1632 * extent should not be added, an attempt is made to satisfy
1635 * the request by allocating an existing (backed) free inode 1633 * the request by allocating an existing (backed) free inode
1636 * from the allocation group. 1634 * from the allocation group.
1637 * 1635 *
1638 * PRE CONDITION: Already have the AG lock for this AG. 1636 * PRE CONDITION: Already have the AG lock for this AG.
1639 * 1637 *
1640 * PARAMETERS: 1638 * PARAMETERS:
1641 * imap - pointer to inode map control structure. 1639 * imap - pointer to inode map control structure.
1642 * agno - allocation group to allocate from. 1640 * agno - allocation group to allocate from.
1643 * dir - 'true' if the new disk inode is for a directory. 1641 * dir - 'true' if the new disk inode is for a directory.
1644 * ip - pointer to the new inode to be filled in on successful return 1642 * ip - pointer to the new inode to be filled in on successful return
1645 * with the disk inode number allocated, its extent address 1643 * with the disk inode number allocated, its extent address
1646 * and the start of the ag. 1644 * and the start of the ag.
1647 * 1645 *
1648 * RETURN VALUES: 1646 * RETURN VALUES:
1649 * 0 - success. 1647 * 0 - success.
1650 * -ENOSPC - insufficient disk resources. 1648 * -ENOSPC - insufficient disk resources.
1651 * -EIO - i/o error. 1649 * -EIO - i/o error.
1652 */ 1650 */
1653 static int 1651 static int
1654 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) 1652 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1655 { 1653 {
1656 int rc, addext, numfree, numinos; 1654 int rc, addext, numfree, numinos;
1657 1655
1658 /* get the number of free and the number of backed disk 1656 /* get the number of free and the number of backed disk
1659 * inodes currently within the ag. 1657 * inodes currently within the ag.
1660 */ 1658 */
1661 numfree = imap->im_agctl[agno].numfree; 1659 numfree = imap->im_agctl[agno].numfree;
1662 numinos = imap->im_agctl[agno].numinos; 1660 numinos = imap->im_agctl[agno].numinos;
1663 1661
1664 if (numfree > numinos) { 1662 if (numfree > numinos) {
1665 jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); 1663 jfs_error(ip->i_sb, "diAllocAG: numfree > numinos");
1666 return -EIO; 1664 return -EIO;
1667 } 1665 }
1668 1666
1669 /* determine if we should allocate a new extent of free inodes 1667 /* determine if we should allocate a new extent of free inodes
1670 * within the ag: for directory inodes, add a new extent 1668 * within the ag: for directory inodes, add a new extent
1671 * if there are a small number of free inodes or number of free 1669 * if there are a small number of free inodes or number of free
1672 * inodes is a small percentage of the number of backed inodes. 1670 * inodes is a small percentage of the number of backed inodes.
1673 */ 1671 */
1674 if (dir) 1672 if (dir)
1675 addext = (numfree < 64 || 1673 addext = (numfree < 64 ||
1676 (numfree < 256 1674 (numfree < 256
1677 && ((numfree * 100) / numinos) <= 20)); 1675 && ((numfree * 100) / numinos) <= 20));
1678 else 1676 else
1679 addext = (numfree == 0); 1677 addext = (numfree == 0);
1680 1678
1681 /* 1679 /*
1682 * try to allocate a new extent of free inodes. 1680 * try to allocate a new extent of free inodes.
1683 */ 1681 */
1684 if (addext) { 1682 if (addext) {
1685 /* if free space is not avaliable for this new extent, try 1683 /* if free space is not avaliable for this new extent, try
1686 * below to allocate a free and existing (already backed) 1684 * below to allocate a free and existing (already backed)
1687 * inode from the ag. 1685 * inode from the ag.
1688 */ 1686 */
1689 if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) 1687 if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC)
1690 return (rc); 1688 return (rc);
1691 } 1689 }
1692 1690
1693 /* 1691 /*
1694 * try to allocate an existing free inode from the ag. 1692 * try to allocate an existing free inode from the ag.
1695 */ 1693 */
1696 return (diAllocIno(imap, agno, ip)); 1694 return (diAllocIno(imap, agno, ip));
1697 } 1695 }
1698 1696
1699 1697
1700 /* 1698 /*
1701 * NAME: diAllocAny(imap,agno,dir,iap) 1699 * NAME: diAllocAny(imap,agno,dir,iap)
1702 * 1700 *
1703 * FUNCTION: allocate a disk inode from any other allocation group. 1701 * FUNCTION: allocate a disk inode from any other allocation group.
1704 * 1702 *
1705 * this routine is called when an allocation attempt within 1703 * this routine is called when an allocation attempt within
1706 * the primary allocation group has failed. if attempts to 1704 * the primary allocation group has failed. if attempts to
1707 * allocate an inode from any allocation group other than the 1705 * allocate an inode from any allocation group other than the
1708 * specified primary group. 1706 * specified primary group.
1709 * 1707 *
1710 * PARAMETERS: 1708 * PARAMETERS:
1711 * imap - pointer to inode map control structure. 1709 * imap - pointer to inode map control structure.
1712 * agno - primary allocation group (to avoid). 1710 * agno - primary allocation group (to avoid).
1713 * dir - 'true' if the new disk inode is for a directory. 1711 * dir - 'true' if the new disk inode is for a directory.
1714 * ip - pointer to a new inode to be filled in on successful return 1712 * ip - pointer to a new inode to be filled in on successful return
1715 * with the disk inode number allocated, its extent address 1713 * with the disk inode number allocated, its extent address
1716 * and the start of the ag. 1714 * and the start of the ag.
1717 * 1715 *
1718 * RETURN VALUES: 1716 * RETURN VALUES:
1719 * 0 - success. 1717 * 0 - success.
1720 * -ENOSPC - insufficient disk resources. 1718 * -ENOSPC - insufficient disk resources.
1721 * -EIO - i/o error. 1719 * -EIO - i/o error.
1722 */ 1720 */
1723 static int 1721 static int
1724 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) 1722 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1725 { 1723 {
1726 int ag, rc; 1724 int ag, rc;
1727 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; 1725 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
1728 1726
1729 1727
1730 /* try to allocate from the ags following agno up to 1728 /* try to allocate from the ags following agno up to
1731 * the maximum ag number. 1729 * the maximum ag number.
1732 */ 1730 */
1733 for (ag = agno + 1; ag <= maxag; ag++) { 1731 for (ag = agno + 1; ag <= maxag; ag++) {
1734 AG_LOCK(imap, ag); 1732 AG_LOCK(imap, ag);
1735 1733
1736 rc = diAllocAG(imap, ag, dir, ip); 1734 rc = diAllocAG(imap, ag, dir, ip);
1737 1735
1738 AG_UNLOCK(imap, ag); 1736 AG_UNLOCK(imap, ag);
1739 1737
1740 if (rc != -ENOSPC) 1738 if (rc != -ENOSPC)
1741 return (rc); 1739 return (rc);
1742 } 1740 }
1743 1741
1744 /* try to allocate from the ags in front of agno. 1742 /* try to allocate from the ags in front of agno.
1745 */ 1743 */
1746 for (ag = 0; ag < agno; ag++) { 1744 for (ag = 0; ag < agno; ag++) {
1747 AG_LOCK(imap, ag); 1745 AG_LOCK(imap, ag);
1748 1746
1749 rc = diAllocAG(imap, ag, dir, ip); 1747 rc = diAllocAG(imap, ag, dir, ip);
1750 1748
1751 AG_UNLOCK(imap, ag); 1749 AG_UNLOCK(imap, ag);
1752 1750
1753 if (rc != -ENOSPC) 1751 if (rc != -ENOSPC)
1754 return (rc); 1752 return (rc);
1755 } 1753 }
1756 1754
1757 /* no free disk inodes. 1755 /* no free disk inodes.
1758 */ 1756 */
1759 return -ENOSPC; 1757 return -ENOSPC;
1760 } 1758 }
1761 1759
1762 1760
1763 /* 1761 /*
1764 * NAME: diAllocIno(imap,agno,ip) 1762 * NAME: diAllocIno(imap,agno,ip)
1765 * 1763 *
1766 * FUNCTION: allocate a disk inode from the allocation group's free 1764 * FUNCTION: allocate a disk inode from the allocation group's free
1767 * inode list, returning an error if this free list is 1765 * inode list, returning an error if this free list is
1768 * empty (i.e. no iags on the list). 1766 * empty (i.e. no iags on the list).
1769 * 1767 *
1770 * allocation occurs from the first iag on the list using 1768 * allocation occurs from the first iag on the list using
1771 * the iag's free inode summary map to find the leftmost 1769 * the iag's free inode summary map to find the leftmost
1772 * free inode in the iag. 1770 * free inode in the iag.
1773 * 1771 *
1774 * PRE CONDITION: Already have AG lock for this AG. 1772 * PRE CONDITION: Already have AG lock for this AG.
1775 * 1773 *
1776 * PARAMETERS: 1774 * PARAMETERS:
1777 * imap - pointer to inode map control structure. 1775 * imap - pointer to inode map control structure.
1778 * agno - allocation group. 1776 * agno - allocation group.
1779 * ip - pointer to new inode to be filled in on successful return 1777 * ip - pointer to new inode to be filled in on successful return
1780 * with the disk inode number allocated, its extent address 1778 * with the disk inode number allocated, its extent address
1781 * and the start of the ag. 1779 * and the start of the ag.
1782 * 1780 *
1783 * RETURN VALUES: 1781 * RETURN VALUES:
1784 * 0 - success. 1782 * 0 - success.
1785 * -ENOSPC - insufficient disk resources. 1783 * -ENOSPC - insufficient disk resources.
1786 * -EIO - i/o error. 1784 * -EIO - i/o error.
1787 */ 1785 */
1788 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1786 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1789 { 1787 {
1790 int iagno, ino, rc, rem, extno, sword; 1788 int iagno, ino, rc, rem, extno, sword;
1791 struct metapage *mp; 1789 struct metapage *mp;
1792 struct iag *iagp; 1790 struct iag *iagp;
1793 1791
1794 /* check if there are iags on the ag's free inode list. 1792 /* check if there are iags on the ag's free inode list.
1795 */ 1793 */
1796 if ((iagno = imap->im_agctl[agno].inofree) < 0) 1794 if ((iagno = imap->im_agctl[agno].inofree) < 0)
1797 return -ENOSPC; 1795 return -ENOSPC;
1798 1796
1799 /* obtain read lock on imap inode */ 1797 /* obtain read lock on imap inode */
1800 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); 1798 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1801 1799
1802 /* read the iag at the head of the list. 1800 /* read the iag at the head of the list.
1803 */ 1801 */
1804 if ((rc = diIAGRead(imap, iagno, &mp))) { 1802 if ((rc = diIAGRead(imap, iagno, &mp))) {
1805 IREAD_UNLOCK(imap->im_ipimap); 1803 IREAD_UNLOCK(imap->im_ipimap);
1806 return (rc); 1804 return (rc);
1807 } 1805 }
1808 iagp = (struct iag *) mp->data; 1806 iagp = (struct iag *) mp->data;
1809 1807
1810 /* better be free inodes in this iag if it is on the 1808 /* better be free inodes in this iag if it is on the
1811 * list. 1809 * list.
1812 */ 1810 */
1813 if (!iagp->nfreeinos) { 1811 if (!iagp->nfreeinos) {
1814 IREAD_UNLOCK(imap->im_ipimap); 1812 IREAD_UNLOCK(imap->im_ipimap);
1815 release_metapage(mp); 1813 release_metapage(mp);
1816 jfs_error(ip->i_sb, 1814 jfs_error(ip->i_sb,
1817 "diAllocIno: nfreeinos = 0, but iag on freelist"); 1815 "diAllocIno: nfreeinos = 0, but iag on freelist");
1818 return -EIO; 1816 return -EIO;
1819 } 1817 }
1820 1818
1821 /* scan the free inode summary map to find an extent 1819 /* scan the free inode summary map to find an extent
1822 * with free inodes. 1820 * with free inodes.
1823 */ 1821 */
1824 for (sword = 0;; sword++) { 1822 for (sword = 0;; sword++) {
1825 if (sword >= SMAPSZ) { 1823 if (sword >= SMAPSZ) {
1826 IREAD_UNLOCK(imap->im_ipimap); 1824 IREAD_UNLOCK(imap->im_ipimap);
1827 release_metapage(mp); 1825 release_metapage(mp);
1828 jfs_error(ip->i_sb, 1826 jfs_error(ip->i_sb,
1829 "diAllocIno: free inode not found in summary map"); 1827 "diAllocIno: free inode not found in summary map");
1830 return -EIO; 1828 return -EIO;
1831 } 1829 }
1832 1830
1833 if (~iagp->inosmap[sword]) 1831 if (~iagp->inosmap[sword])
1834 break; 1832 break;
1835 } 1833 }
1836 1834
1837 /* found a extent with free inodes. determine 1835 /* found a extent with free inodes. determine
1838 * the extent number. 1836 * the extent number.
1839 */ 1837 */
1840 rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); 1838 rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
1841 if (rem >= EXTSPERSUM) { 1839 if (rem >= EXTSPERSUM) {
1842 IREAD_UNLOCK(imap->im_ipimap); 1840 IREAD_UNLOCK(imap->im_ipimap);
1843 release_metapage(mp); 1841 release_metapage(mp);
1844 jfs_error(ip->i_sb, "diAllocIno: no free extent found"); 1842 jfs_error(ip->i_sb, "diAllocIno: no free extent found");
1845 return -EIO; 1843 return -EIO;
1846 } 1844 }
1847 extno = (sword << L2EXTSPERSUM) + rem; 1845 extno = (sword << L2EXTSPERSUM) + rem;
1848 1846
1849 /* find the first free inode in the extent. 1847 /* find the first free inode in the extent.
1850 */ 1848 */
1851 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); 1849 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0);
1852 if (rem >= INOSPEREXT) { 1850 if (rem >= INOSPEREXT) {
1853 IREAD_UNLOCK(imap->im_ipimap); 1851 IREAD_UNLOCK(imap->im_ipimap);
1854 release_metapage(mp); 1852 release_metapage(mp);
1855 jfs_error(ip->i_sb, "diAllocIno: free inode not found"); 1853 jfs_error(ip->i_sb, "diAllocIno: free inode not found");
1856 return -EIO; 1854 return -EIO;
1857 } 1855 }
1858 1856
1859 /* compute the inode number within the iag. 1857 /* compute the inode number within the iag.
1860 */ 1858 */
1861 ino = (extno << L2INOSPEREXT) + rem; 1859 ino = (extno << L2INOSPEREXT) + rem;
1862 1860
1863 /* allocate the inode. 1861 /* allocate the inode.
1864 */ 1862 */
1865 rc = diAllocBit(imap, iagp, ino); 1863 rc = diAllocBit(imap, iagp, ino);
1866 IREAD_UNLOCK(imap->im_ipimap); 1864 IREAD_UNLOCK(imap->im_ipimap);
1867 if (rc) { 1865 if (rc) {
1868 release_metapage(mp); 1866 release_metapage(mp);
1869 return (rc); 1867 return (rc);
1870 } 1868 }
1871 1869
1872 /* set the results of the allocation and write the iag. 1870 /* set the results of the allocation and write the iag.
1873 */ 1871 */
1874 diInitInode(ip, iagno, ino, extno, iagp); 1872 diInitInode(ip, iagno, ino, extno, iagp);
1875 write_metapage(mp); 1873 write_metapage(mp);
1876 1874
1877 return (0); 1875 return (0);
1878 } 1876 }
1879 1877
1880 1878
1881 /* 1879 /*
1882 * NAME: diAllocExt(imap,agno,ip) 1880 * NAME: diAllocExt(imap,agno,ip)
1883 * 1881 *
1884 * FUNCTION: add a new extent of free inodes to an iag, allocating 1882 * FUNCTION: add a new extent of free inodes to an iag, allocating
1885 * an inode from this extent to satisfy the current allocation 1883 * an inode from this extent to satisfy the current allocation
1886 * request. 1884 * request.
1887 * 1885 *
1888 * this routine first tries to find an existing iag with free 1886 * this routine first tries to find an existing iag with free
1889 * extents through the ag free extent list. if list is not 1887 * extents through the ag free extent list. if list is not
1890 * empty, the head of the list will be selected as the home 1888 * empty, the head of the list will be selected as the home
1891 * of the new extent of free inodes. otherwise (the list is 1889 * of the new extent of free inodes. otherwise (the list is
1892 * empty), a new iag will be allocated for the ag to contain 1890 * empty), a new iag will be allocated for the ag to contain
1893 * the extent. 1891 * the extent.
1894 * 1892 *
1895 * once an iag has been selected, the free extent summary map 1893 * once an iag has been selected, the free extent summary map
1896 * is used to locate a free extent within the iag and diNewExt() 1894 * is used to locate a free extent within the iag and diNewExt()
1897 * is called to initialize the extent, with initialization 1895 * is called to initialize the extent, with initialization
1898 * including the allocation of the first inode of the extent 1896 * including the allocation of the first inode of the extent
1899 * for the purpose of satisfying this request. 1897 * for the purpose of satisfying this request.
1900 * 1898 *
1901 * PARAMETERS: 1899 * PARAMETERS:
1902 * imap - pointer to inode map control structure. 1900 * imap - pointer to inode map control structure.
1903 * agno - allocation group number. 1901 * agno - allocation group number.
1904 * ip - pointer to new inode to be filled in on successful return 1902 * ip - pointer to new inode to be filled in on successful return
1905 * with the disk inode number allocated, its extent address 1903 * with the disk inode number allocated, its extent address
1906 * and the start of the ag. 1904 * and the start of the ag.
1907 * 1905 *
1908 * RETURN VALUES: 1906 * RETURN VALUES:
1909 * 0 - success. 1907 * 0 - success.
1910 * -ENOSPC - insufficient disk resources. 1908 * -ENOSPC - insufficient disk resources.
1911 * -EIO - i/o error. 1909 * -EIO - i/o error.
1912 */ 1910 */
1913 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1911 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1914 { 1912 {
1915 int rem, iagno, sword, extno, rc; 1913 int rem, iagno, sword, extno, rc;
1916 struct metapage *mp; 1914 struct metapage *mp;
1917 struct iag *iagp; 1915 struct iag *iagp;
1918 1916
1919 /* check if the ag has any iags with free extents. if not, 1917 /* check if the ag has any iags with free extents. if not,
1920 * allocate a new iag for the ag. 1918 * allocate a new iag for the ag.
1921 */ 1919 */
1922 if ((iagno = imap->im_agctl[agno].extfree) < 0) { 1920 if ((iagno = imap->im_agctl[agno].extfree) < 0) {
1923 /* If successful, diNewIAG will obtain the read lock on the 1921 /* If successful, diNewIAG will obtain the read lock on the
1924 * imap inode. 1922 * imap inode.
1925 */ 1923 */
1926 if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { 1924 if ((rc = diNewIAG(imap, &iagno, agno, &mp))) {
1927 return (rc); 1925 return (rc);
1928 } 1926 }
1929 iagp = (struct iag *) mp->data; 1927 iagp = (struct iag *) mp->data;
1930 1928
1931 /* set the ag number if this a brand new iag 1929 /* set the ag number if this a brand new iag
1932 */ 1930 */
1933 iagp->agstart = 1931 iagp->agstart =
1934 cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); 1932 cpu_to_le64(AGTOBLK(agno, imap->im_ipimap));
1935 } else { 1933 } else {
1936 /* read the iag. 1934 /* read the iag.
1937 */ 1935 */
1938 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); 1936 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1939 if ((rc = diIAGRead(imap, iagno, &mp))) { 1937 if ((rc = diIAGRead(imap, iagno, &mp))) {
1940 IREAD_UNLOCK(imap->im_ipimap); 1938 IREAD_UNLOCK(imap->im_ipimap);
1941 jfs_error(ip->i_sb, "diAllocExt: error reading iag"); 1939 jfs_error(ip->i_sb, "diAllocExt: error reading iag");
1942 return rc; 1940 return rc;
1943 } 1941 }
1944 iagp = (struct iag *) mp->data; 1942 iagp = (struct iag *) mp->data;
1945 } 1943 }
1946 1944
1947 /* using the free extent summary map, find a free extent. 1945 /* using the free extent summary map, find a free extent.
1948 */ 1946 */
1949 for (sword = 0;; sword++) { 1947 for (sword = 0;; sword++) {
1950 if (sword >= SMAPSZ) { 1948 if (sword >= SMAPSZ) {
1951 release_metapage(mp); 1949 release_metapage(mp);
1952 IREAD_UNLOCK(imap->im_ipimap); 1950 IREAD_UNLOCK(imap->im_ipimap);
1953 jfs_error(ip->i_sb, 1951 jfs_error(ip->i_sb,
1954 "diAllocExt: free ext summary map not found"); 1952 "diAllocExt: free ext summary map not found");
1955 return -EIO; 1953 return -EIO;
1956 } 1954 }
1957 if (~iagp->extsmap[sword]) 1955 if (~iagp->extsmap[sword])
1958 break; 1956 break;
1959 } 1957 }
1960 1958
1961 /* determine the extent number of the free extent. 1959 /* determine the extent number of the free extent.
1962 */ 1960 */
1963 rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); 1961 rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0);
1964 if (rem >= EXTSPERSUM) { 1962 if (rem >= EXTSPERSUM) {
1965 release_metapage(mp); 1963 release_metapage(mp);
1966 IREAD_UNLOCK(imap->im_ipimap); 1964 IREAD_UNLOCK(imap->im_ipimap);
1967 jfs_error(ip->i_sb, "diAllocExt: free extent not found"); 1965 jfs_error(ip->i_sb, "diAllocExt: free extent not found");
1968 return -EIO; 1966 return -EIO;
1969 } 1967 }
1970 extno = (sword << L2EXTSPERSUM) + rem; 1968 extno = (sword << L2EXTSPERSUM) + rem;
1971 1969
1972 /* initialize the new extent. 1970 /* initialize the new extent.
1973 */ 1971 */
1974 rc = diNewExt(imap, iagp, extno); 1972 rc = diNewExt(imap, iagp, extno);
1975 IREAD_UNLOCK(imap->im_ipimap); 1973 IREAD_UNLOCK(imap->im_ipimap);
1976 if (rc) { 1974 if (rc) {
1977 /* something bad happened. if a new iag was allocated, 1975 /* something bad happened. if a new iag was allocated,
1978 * place it back on the inode map's iag free list, and 1976 * place it back on the inode map's iag free list, and
1979 * clear the ag number information. 1977 * clear the ag number information.
1980 */ 1978 */
1981 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 1979 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
1982 IAGFREE_LOCK(imap); 1980 IAGFREE_LOCK(imap);
1983 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1981 iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1984 imap->im_freeiag = iagno; 1982 imap->im_freeiag = iagno;
1985 IAGFREE_UNLOCK(imap); 1983 IAGFREE_UNLOCK(imap);
1986 } 1984 }
1987 write_metapage(mp); 1985 write_metapage(mp);
1988 return (rc); 1986 return (rc);
1989 } 1987 }
1990 1988
1991 /* set the results of the allocation and write the iag. 1989 /* set the results of the allocation and write the iag.
1992 */ 1990 */
1993 diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); 1991 diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp);
1994 1992
1995 write_metapage(mp); 1993 write_metapage(mp);
1996 1994
1997 return (0); 1995 return (0);
1998 } 1996 }
1999 1997
2000 1998
2001 /* 1999 /*
2002 * NAME: diAllocBit(imap,iagp,ino) 2000 * NAME: diAllocBit(imap,iagp,ino)
2003 * 2001 *
2004 * FUNCTION: allocate a backed inode from an iag. 2002 * FUNCTION: allocate a backed inode from an iag.
2005 * 2003 *
2006 * this routine performs the mechanics of allocating a 2004 * this routine performs the mechanics of allocating a
2007 * specified inode from a backed extent. 2005 * specified inode from a backed extent.
2008 * 2006 *
2009 * if the inode to be allocated represents the last free 2007 * if the inode to be allocated represents the last free
2010 * inode within the iag, the iag will be removed from the 2008 * inode within the iag, the iag will be removed from the
2011 * ag free inode list. 2009 * ag free inode list.
2012 * 2010 *
2013 * a careful update approach is used to provide consistency 2011 * a careful update approach is used to provide consistency
2014 * in the face of updates to multiple buffers. under this 2012 * in the face of updates to multiple buffers. under this
2015 * approach, all required buffers are obtained before making 2013 * approach, all required buffers are obtained before making
2016 * any updates and are held all are updates are complete. 2014 * any updates and are held all are updates are complete.
2017 * 2015 *
2018 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2016 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on
2019 * this AG. Must have read lock on imap inode. 2017 * this AG. Must have read lock on imap inode.
2020 * 2018 *
2021 * PARAMETERS: 2019 * PARAMETERS:
2022 * imap - pointer to inode map control structure. 2020 * imap - pointer to inode map control structure.
2023 * iagp - pointer to iag. 2021 * iagp - pointer to iag.
2024 * ino - inode number to be allocated within the iag. 2022 * ino - inode number to be allocated within the iag.
2025 * 2023 *
2026 * RETURN VALUES: 2024 * RETURN VALUES:
2027 * 0 - success. 2025 * 0 - success.
2028 * -ENOSPC - insufficient disk resources. 2026 * -ENOSPC - insufficient disk resources.
2029 * -EIO - i/o error. 2027 * -EIO - i/o error.
2030 */ 2028 */
2031 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2029 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2032 { 2030 {
2033 int extno, bitno, agno, sword, rc; 2031 int extno, bitno, agno, sword, rc;
2034 struct metapage *amp = NULL, *bmp = NULL; 2032 struct metapage *amp = NULL, *bmp = NULL;
2035 struct iag *aiagp = NULL, *biagp = NULL; 2033 struct iag *aiagp = NULL, *biagp = NULL;
2036 u32 mask; 2034 u32 mask;
2037 2035
2038 /* check if this is the last free inode within the iag. 2036 /* check if this is the last free inode within the iag.
2039 * if so, it will have to be removed from the ag free 2037 * if so, it will have to be removed from the ag free
2040 * inode list, so get the iags preceeding and following 2038 * inode list, so get the iags preceeding and following
2041 * it on the list. 2039 * it on the list.
2042 */ 2040 */
2043 if (iagp->nfreeinos == cpu_to_le32(1)) { 2041 if (iagp->nfreeinos == cpu_to_le32(1)) {
2044 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { 2042 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) {
2045 if ((rc = 2043 if ((rc =
2046 diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), 2044 diIAGRead(imap, le32_to_cpu(iagp->inofreefwd),
2047 &amp))) 2045 &amp)))
2048 return (rc); 2046 return (rc);
2049 aiagp = (struct iag *) amp->data; 2047 aiagp = (struct iag *) amp->data;
2050 } 2048 }
2051 2049
2052 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { 2050 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) {
2053 if ((rc = 2051 if ((rc =
2054 diIAGRead(imap, 2052 diIAGRead(imap,
2055 le32_to_cpu(iagp->inofreeback), 2053 le32_to_cpu(iagp->inofreeback),
2056 &bmp))) { 2054 &bmp))) {
2057 if (amp) 2055 if (amp)
2058 release_metapage(amp); 2056 release_metapage(amp);
2059 return (rc); 2057 return (rc);
2060 } 2058 }
2061 biagp = (struct iag *) bmp->data; 2059 biagp = (struct iag *) bmp->data;
2062 } 2060 }
2063 } 2061 }
2064 2062
2065 /* get the ag number, extent number, inode number within 2063 /* get the ag number, extent number, inode number within
2066 * the extent. 2064 * the extent.
2067 */ 2065 */
2068 agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); 2066 agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb));
2069 extno = ino >> L2INOSPEREXT; 2067 extno = ino >> L2INOSPEREXT;
2070 bitno = ino & (INOSPEREXT - 1); 2068 bitno = ino & (INOSPEREXT - 1);
2071 2069
2072 /* compute the mask for setting the map. 2070 /* compute the mask for setting the map.
2073 */ 2071 */
2074 mask = HIGHORDER >> bitno; 2072 mask = HIGHORDER >> bitno;
2075 2073
2076 /* the inode should be free and backed. 2074 /* the inode should be free and backed.
2077 */ 2075 */
2078 if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || 2076 if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) ||
2079 ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || 2077 ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) ||
2080 (addressPXD(&iagp->inoext[extno]) == 0)) { 2078 (addressPXD(&iagp->inoext[extno]) == 0)) {
2081 if (amp) 2079 if (amp)
2082 release_metapage(amp); 2080 release_metapage(amp);
2083 if (bmp) 2081 if (bmp)
2084 release_metapage(bmp); 2082 release_metapage(bmp);
2085 2083
2086 jfs_error(imap->im_ipimap->i_sb, 2084 jfs_error(imap->im_ipimap->i_sb,
2087 "diAllocBit: iag inconsistent"); 2085 "diAllocBit: iag inconsistent");
2088 return -EIO; 2086 return -EIO;
2089 } 2087 }
2090 2088
2091 /* mark the inode as allocated in the working map. 2089 /* mark the inode as allocated in the working map.
2092 */ 2090 */
2093 iagp->wmap[extno] |= cpu_to_le32(mask); 2091 iagp->wmap[extno] |= cpu_to_le32(mask);
2094 2092
2095 /* check if all inodes within the extent are now 2093 /* check if all inodes within the extent are now
2096 * allocated. if so, update the free inode summary 2094 * allocated. if so, update the free inode summary
2097 * map to reflect this. 2095 * map to reflect this.
2098 */ 2096 */
2099 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 2097 if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
2100 sword = extno >> L2EXTSPERSUM; 2098 sword = extno >> L2EXTSPERSUM;
2101 bitno = extno & (EXTSPERSUM - 1); 2099 bitno = extno & (EXTSPERSUM - 1);
2102 iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); 2100 iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno);
2103 } 2101 }
2104 2102
2105 /* if this was the last free inode in the iag, remove the 2103 /* if this was the last free inode in the iag, remove the
2106 * iag from the ag free inode list. 2104 * iag from the ag free inode list.
2107 */ 2105 */
2108 if (iagp->nfreeinos == cpu_to_le32(1)) { 2106 if (iagp->nfreeinos == cpu_to_le32(1)) {
2109 if (amp) { 2107 if (amp) {
2110 aiagp->inofreeback = iagp->inofreeback; 2108 aiagp->inofreeback = iagp->inofreeback;
2111 write_metapage(amp); 2109 write_metapage(amp);
2112 } 2110 }
2113 2111
2114 if (bmp) { 2112 if (bmp) {
2115 biagp->inofreefwd = iagp->inofreefwd; 2113 biagp->inofreefwd = iagp->inofreefwd;
2116 write_metapage(bmp); 2114 write_metapage(bmp);
2117 } else { 2115 } else {
2118 imap->im_agctl[agno].inofree = 2116 imap->im_agctl[agno].inofree =
2119 le32_to_cpu(iagp->inofreefwd); 2117 le32_to_cpu(iagp->inofreefwd);
2120 } 2118 }
2121 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2119 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2122 } 2120 }
2123 2121
2124 /* update the free inode count at the iag, ag, inode 2122 /* update the free inode count at the iag, ag, inode
2125 * map levels. 2123 * map levels.
2126 */ 2124 */
2127 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); 2125 le32_add_cpu(&iagp->nfreeinos, -1);
2128 imap->im_agctl[agno].numfree -= 1; 2126 imap->im_agctl[agno].numfree -= 1;
2129 atomic_dec(&imap->im_numfree); 2127 atomic_dec(&imap->im_numfree);
2130 2128
2131 return (0); 2129 return (0);
2132 } 2130 }
2133 2131
2134 2132
2135 /* 2133 /*
2136 * NAME: diNewExt(imap,iagp,extno) 2134 * NAME: diNewExt(imap,iagp,extno)
2137 * 2135 *
2138 * FUNCTION: initialize a new extent of inodes for an iag, allocating 2136 * FUNCTION: initialize a new extent of inodes for an iag, allocating
2139 * the first inode of the extent for use for the current 2137 * the first inode of the extent for use for the current
2140 * allocation request. 2138 * allocation request.
2141 * 2139 *
2142 * disk resources are allocated for the new extent of inodes 2140 * disk resources are allocated for the new extent of inodes
2143 * and the inodes themselves are initialized to reflect their 2141 * and the inodes themselves are initialized to reflect their
2144 * existence within the extent (i.e. their inode numbers and 2142 * existence within the extent (i.e. their inode numbers and
2145 * inode extent addresses are set) and their initial state 2143 * inode extent addresses are set) and their initial state
2146 * (mode and link count are set to zero). 2144 * (mode and link count are set to zero).
2147 * 2145 *
2148 * if the iag is new, it is not yet on an ag extent free list 2146 * if the iag is new, it is not yet on an ag extent free list
2149 * but will now be placed on this list. 2147 * but will now be placed on this list.
2150 * 2148 *
2151 * if the allocation of the new extent causes the iag to 2149 * if the allocation of the new extent causes the iag to
2152 * have no free extent, the iag will be removed from the 2150 * have no free extent, the iag will be removed from the
2153 * ag extent free list. 2151 * ag extent free list.
2154 * 2152 *
2155 * if the iag has no free backed inodes, it will be placed 2153 * if the iag has no free backed inodes, it will be placed
2156 * on the ag free inode list, since the addition of the new 2154 * on the ag free inode list, since the addition of the new
2157 * extent will now cause it to have free inodes. 2155 * extent will now cause it to have free inodes.
2158 * 2156 *
2159 * a careful update approach is used to provide consistency 2157 * a careful update approach is used to provide consistency
2160 * (i.e. list consistency) in the face of updates to multiple 2158 * (i.e. list consistency) in the face of updates to multiple
2161 * buffers. under this approach, all required buffers are 2159 * buffers. under this approach, all required buffers are
2162 * obtained before making any updates and are held until all 2160 * obtained before making any updates and are held until all
2163 * updates are complete. 2161 * updates are complete.
2164 * 2162 *
2165 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2163 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on
2166 * this AG. Must have read lock on imap inode. 2164 * this AG. Must have read lock on imap inode.
2167 * 2165 *
2168 * PARAMETERS: 2166 * PARAMETERS:
2169 * imap - pointer to inode map control structure. 2167 * imap - pointer to inode map control structure.
2170 * iagp - pointer to iag. 2168 * iagp - pointer to iag.
2171 * extno - extent number. 2169 * extno - extent number.
2172 * 2170 *
2173 * RETURN VALUES: 2171 * RETURN VALUES:
2174 * 0 - success. 2172 * 0 - success.
2175 * -ENOSPC - insufficient disk resources. 2173 * -ENOSPC - insufficient disk resources.
2176 * -EIO - i/o error. 2174 * -EIO - i/o error.
2177 */ 2175 */
2178 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2176 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2179 { 2177 {
2180 int agno, iagno, fwd, back, freei = 0, sword, rc; 2178 int agno, iagno, fwd, back, freei = 0, sword, rc;
2181 struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; 2179 struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL;
2182 struct metapage *amp, *bmp, *cmp, *dmp; 2180 struct metapage *amp, *bmp, *cmp, *dmp;
2183 struct inode *ipimap; 2181 struct inode *ipimap;
2184 s64 blkno, hint; 2182 s64 blkno, hint;
2185 int i, j; 2183 int i, j;
2186 u32 mask; 2184 u32 mask;
2187 ino_t ino; 2185 ino_t ino;
2188 struct dinode *dp; 2186 struct dinode *dp;
2189 struct jfs_sb_info *sbi; 2187 struct jfs_sb_info *sbi;
2190 2188
2191 /* better have free extents. 2189 /* better have free extents.
2192 */ 2190 */
2193 if (!iagp->nfreeexts) { 2191 if (!iagp->nfreeexts) {
2194 jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); 2192 jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents");
2195 return -EIO; 2193 return -EIO;
2196 } 2194 }
2197 2195
2198 /* get the inode map inode. 2196 /* get the inode map inode.
2199 */ 2197 */
2200 ipimap = imap->im_ipimap; 2198 ipimap = imap->im_ipimap;
2201 sbi = JFS_SBI(ipimap->i_sb); 2199 sbi = JFS_SBI(ipimap->i_sb);
2202 2200
2203 amp = bmp = cmp = NULL; 2201 amp = bmp = cmp = NULL;
2204 2202
2205 /* get the ag and iag numbers for this iag. 2203 /* get the ag and iag numbers for this iag.
2206 */ 2204 */
2207 agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 2205 agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
2208 iagno = le32_to_cpu(iagp->iagnum); 2206 iagno = le32_to_cpu(iagp->iagnum);
2209 2207
2210 /* check if this is the last free extent within the 2208 /* check if this is the last free extent within the
2211 * iag. if so, the iag must be removed from the ag 2209 * iag. if so, the iag must be removed from the ag
2212 * free extent list, so get the iags preceeding and 2210 * free extent list, so get the iags preceeding and
2213 * following the iag on this list. 2211 * following the iag on this list.
2214 */ 2212 */
2215 if (iagp->nfreeexts == cpu_to_le32(1)) { 2213 if (iagp->nfreeexts == cpu_to_le32(1)) {
2216 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 2214 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
2217 if ((rc = diIAGRead(imap, fwd, &amp))) 2215 if ((rc = diIAGRead(imap, fwd, &amp)))
2218 return (rc); 2216 return (rc);
2219 aiagp = (struct iag *) amp->data; 2217 aiagp = (struct iag *) amp->data;
2220 } 2218 }
2221 2219
2222 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 2220 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
2223 if ((rc = diIAGRead(imap, back, &bmp))) 2221 if ((rc = diIAGRead(imap, back, &bmp)))
2224 goto error_out; 2222 goto error_out;
2225 biagp = (struct iag *) bmp->data; 2223 biagp = (struct iag *) bmp->data;
2226 } 2224 }
2227 } else { 2225 } else {
2228 /* the iag has free extents. if all extents are free 2226 /* the iag has free extents. if all extents are free
2229 * (as is the case for a newly allocated iag), the iag 2227 * (as is the case for a newly allocated iag), the iag
2230 * must be added to the ag free extent list, so get 2228 * must be added to the ag free extent list, so get
2231 * the iag at the head of the list in preparation for 2229 * the iag at the head of the list in preparation for
2232 * adding this iag to this list. 2230 * adding this iag to this list.
2233 */ 2231 */
2234 fwd = back = -1; 2232 fwd = back = -1;
2235 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2233 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2236 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 2234 if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
2237 if ((rc = diIAGRead(imap, fwd, &amp))) 2235 if ((rc = diIAGRead(imap, fwd, &amp)))
2238 goto error_out; 2236 goto error_out;
2239 aiagp = (struct iag *) amp->data; 2237 aiagp = (struct iag *) amp->data;
2240 } 2238 }
2241 } 2239 }
2242 } 2240 }
2243 2241
2244 /* check if the iag has no free inodes. if so, the iag 2242 /* check if the iag has no free inodes. if so, the iag
2245 * will have to be added to the ag free inode list, so get 2243 * will have to be added to the ag free inode list, so get
2246 * the iag at the head of the list in preparation for 2244 * the iag at the head of the list in preparation for
2247 * adding this iag to this list. in doing this, we must 2245 * adding this iag to this list. in doing this, we must
2248 * check if we already have the iag at the head of 2246 * check if we already have the iag at the head of
2249 * the list in hand. 2247 * the list in hand.
2250 */ 2248 */
2251 if (iagp->nfreeinos == 0) { 2249 if (iagp->nfreeinos == 0) {
2252 freei = imap->im_agctl[agno].inofree; 2250 freei = imap->im_agctl[agno].inofree;
2253 2251
2254 if (freei >= 0) { 2252 if (freei >= 0) {
2255 if (freei == fwd) { 2253 if (freei == fwd) {
2256 ciagp = aiagp; 2254 ciagp = aiagp;
2257 } else if (freei == back) { 2255 } else if (freei == back) {
2258 ciagp = biagp; 2256 ciagp = biagp;
2259 } else { 2257 } else {
2260 if ((rc = diIAGRead(imap, freei, &cmp))) 2258 if ((rc = diIAGRead(imap, freei, &cmp)))
2261 goto error_out; 2259 goto error_out;
2262 ciagp = (struct iag *) cmp->data; 2260 ciagp = (struct iag *) cmp->data;
2263 } 2261 }
2264 if (ciagp == NULL) { 2262 if (ciagp == NULL) {
2265 jfs_error(imap->im_ipimap->i_sb, 2263 jfs_error(imap->im_ipimap->i_sb,
2266 "diNewExt: ciagp == NULL"); 2264 "diNewExt: ciagp == NULL");
2267 rc = -EIO; 2265 rc = -EIO;
2268 goto error_out; 2266 goto error_out;
2269 } 2267 }
2270 } 2268 }
2271 } 2269 }
2272 2270
2273 /* allocate disk space for the inode extent. 2271 /* allocate disk space for the inode extent.
2274 */ 2272 */
2275 if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) 2273 if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0))
2276 hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; 2274 hint = ((s64) agno << sbi->bmap->db_agl2size) - 1;
2277 else 2275 else
2278 hint = addressPXD(&iagp->inoext[extno - 1]) + 2276 hint = addressPXD(&iagp->inoext[extno - 1]) +
2279 lengthPXD(&iagp->inoext[extno - 1]) - 1; 2277 lengthPXD(&iagp->inoext[extno - 1]) - 1;
2280 2278
2281 if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) 2279 if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno)))
2282 goto error_out; 2280 goto error_out;
2283 2281
2284 /* compute the inode number of the first inode within the 2282 /* compute the inode number of the first inode within the
2285 * extent. 2283 * extent.
2286 */ 2284 */
2287 ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); 2285 ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT);
2288 2286
2289 /* initialize the inodes within the newly allocated extent a 2287 /* initialize the inodes within the newly allocated extent a
2290 * page at a time. 2288 * page at a time.
2291 */ 2289 */
2292 for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { 2290 for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) {
2293 /* get a buffer for this page of disk inodes. 2291 /* get a buffer for this page of disk inodes.
2294 */ 2292 */
2295 dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); 2293 dmp = get_metapage(ipimap, blkno + i, PSIZE, 1);
2296 if (dmp == NULL) { 2294 if (dmp == NULL) {
2297 rc = -EIO; 2295 rc = -EIO;
2298 goto error_out; 2296 goto error_out;
2299 } 2297 }
2300 dp = (struct dinode *) dmp->data; 2298 dp = (struct dinode *) dmp->data;
2301 2299
2302 /* initialize the inode number, mode, link count and 2300 /* initialize the inode number, mode, link count and
2303 * inode extent address. 2301 * inode extent address.
2304 */ 2302 */
2305 for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { 2303 for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) {
2306 dp->di_inostamp = cpu_to_le32(sbi->inostamp); 2304 dp->di_inostamp = cpu_to_le32(sbi->inostamp);
2307 dp->di_number = cpu_to_le32(ino); 2305 dp->di_number = cpu_to_le32(ino);
2308 dp->di_fileset = cpu_to_le32(FILESYSTEM_I); 2306 dp->di_fileset = cpu_to_le32(FILESYSTEM_I);
2309 dp->di_mode = 0; 2307 dp->di_mode = 0;
2310 dp->di_nlink = 0; 2308 dp->di_nlink = 0;
2311 PXDaddress(&(dp->di_ixpxd), blkno); 2309 PXDaddress(&(dp->di_ixpxd), blkno);
2312 PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); 2310 PXDlength(&(dp->di_ixpxd), imap->im_nbperiext);
2313 } 2311 }
2314 write_metapage(dmp); 2312 write_metapage(dmp);
2315 } 2313 }
2316 2314
2317 /* if this is the last free extent within the iag, remove the 2315 /* if this is the last free extent within the iag, remove the
2318 * iag from the ag free extent list. 2316 * iag from the ag free extent list.
2319 */ 2317 */
2320 if (iagp->nfreeexts == cpu_to_le32(1)) { 2318 if (iagp->nfreeexts == cpu_to_le32(1)) {
2321 if (fwd >= 0) 2319 if (fwd >= 0)
2322 aiagp->extfreeback = iagp->extfreeback; 2320 aiagp->extfreeback = iagp->extfreeback;
2323 2321
2324 if (back >= 0) 2322 if (back >= 0)
2325 biagp->extfreefwd = iagp->extfreefwd; 2323 biagp->extfreefwd = iagp->extfreefwd;
2326 else 2324 else
2327 imap->im_agctl[agno].extfree = 2325 imap->im_agctl[agno].extfree =
2328 le32_to_cpu(iagp->extfreefwd); 2326 le32_to_cpu(iagp->extfreefwd);
2329 2327
2330 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2328 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2331 } else { 2329 } else {
2332 /* if the iag has all free extents (newly allocated iag), 2330 /* if the iag has all free extents (newly allocated iag),
2333 * add the iag to the ag free extent list. 2331 * add the iag to the ag free extent list.
2334 */ 2332 */
2335 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2333 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2336 if (fwd >= 0) 2334 if (fwd >= 0)
2337 aiagp->extfreeback = cpu_to_le32(iagno); 2335 aiagp->extfreeback = cpu_to_le32(iagno);
2338 2336
2339 iagp->extfreefwd = cpu_to_le32(fwd); 2337 iagp->extfreefwd = cpu_to_le32(fwd);
2340 iagp->extfreeback = cpu_to_le32(-1); 2338 iagp->extfreeback = cpu_to_le32(-1);
2341 imap->im_agctl[agno].extfree = iagno; 2339 imap->im_agctl[agno].extfree = iagno;
2342 } 2340 }
2343 } 2341 }
2344 2342
2345 /* if the iag has no free inodes, add the iag to the 2343 /* if the iag has no free inodes, add the iag to the
2346 * ag free inode list. 2344 * ag free inode list.
2347 */ 2345 */
2348 if (iagp->nfreeinos == 0) { 2346 if (iagp->nfreeinos == 0) {
2349 if (freei >= 0) 2347 if (freei >= 0)
2350 ciagp->inofreeback = cpu_to_le32(iagno); 2348 ciagp->inofreeback = cpu_to_le32(iagno);
2351 2349
2352 iagp->inofreefwd = 2350 iagp->inofreefwd =
2353 cpu_to_le32(imap->im_agctl[agno].inofree); 2351 cpu_to_le32(imap->im_agctl[agno].inofree);
2354 iagp->inofreeback = cpu_to_le32(-1); 2352 iagp->inofreeback = cpu_to_le32(-1);
2355 imap->im_agctl[agno].inofree = iagno; 2353 imap->im_agctl[agno].inofree = iagno;
2356 } 2354 }
2357 2355
2358 /* initialize the extent descriptor of the extent. */ 2356 /* initialize the extent descriptor of the extent. */
2359 PXDlength(&iagp->inoext[extno], imap->im_nbperiext); 2357 PXDlength(&iagp->inoext[extno], imap->im_nbperiext);
2360 PXDaddress(&iagp->inoext[extno], blkno); 2358 PXDaddress(&iagp->inoext[extno], blkno);
2361 2359
2362 /* initialize the working and persistent map of the extent. 2360 /* initialize the working and persistent map of the extent.
2363 * the working map will be initialized such that 2361 * the working map will be initialized such that
2364 * it indicates the first inode of the extent is allocated. 2362 * it indicates the first inode of the extent is allocated.
2365 */ 2363 */
2366 iagp->wmap[extno] = cpu_to_le32(HIGHORDER); 2364 iagp->wmap[extno] = cpu_to_le32(HIGHORDER);
2367 iagp->pmap[extno] = 0; 2365 iagp->pmap[extno] = 0;
2368 2366
2369 /* update the free inode and free extent summary maps 2367 /* update the free inode and free extent summary maps
2370 * for the extent to indicate the extent has free inodes 2368 * for the extent to indicate the extent has free inodes
2371 * and no longer represents a free extent. 2369 * and no longer represents a free extent.
2372 */ 2370 */
2373 sword = extno >> L2EXTSPERSUM; 2371 sword = extno >> L2EXTSPERSUM;
2374 mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); 2372 mask = HIGHORDER >> (extno & (EXTSPERSUM - 1));
2375 iagp->extsmap[sword] |= cpu_to_le32(mask); 2373 iagp->extsmap[sword] |= cpu_to_le32(mask);
2376 iagp->inosmap[sword] &= cpu_to_le32(~mask); 2374 iagp->inosmap[sword] &= cpu_to_le32(~mask);
2377 2375
2378 /* update the free inode and free extent counts for the 2376 /* update the free inode and free extent counts for the
2379 * iag. 2377 * iag.
2380 */ 2378 */
2381 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 2379 le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
2382 (INOSPEREXT - 1)); 2380 le32_add_cpu(&iagp->nfreeexts, -1);
2383 iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1);
2384 2381
2385 /* update the free and backed inode counts for the ag. 2382 /* update the free and backed inode counts for the ag.
2386 */ 2383 */
2387 imap->im_agctl[agno].numfree += (INOSPEREXT - 1); 2384 imap->im_agctl[agno].numfree += (INOSPEREXT - 1);
2388 imap->im_agctl[agno].numinos += INOSPEREXT; 2385 imap->im_agctl[agno].numinos += INOSPEREXT;
2389 2386
2390 /* update the free and backed inode counts for the inode map. 2387 /* update the free and backed inode counts for the inode map.
2391 */ 2388 */
2392 atomic_add(INOSPEREXT - 1, &imap->im_numfree); 2389 atomic_add(INOSPEREXT - 1, &imap->im_numfree);
2393 atomic_add(INOSPEREXT, &imap->im_numinos); 2390 atomic_add(INOSPEREXT, &imap->im_numinos);
2394 2391
2395 /* write the iags. 2392 /* write the iags.
2396 */ 2393 */
2397 if (amp) 2394 if (amp)
2398 write_metapage(amp); 2395 write_metapage(amp);
2399 if (bmp) 2396 if (bmp)
2400 write_metapage(bmp); 2397 write_metapage(bmp);
2401 if (cmp) 2398 if (cmp)
2402 write_metapage(cmp); 2399 write_metapage(cmp);
2403 2400
2404 return (0); 2401 return (0);
2405 2402
2406 error_out: 2403 error_out:
2407 2404
2408 /* release the iags. 2405 /* release the iags.
2409 */ 2406 */
2410 if (amp) 2407 if (amp)
2411 release_metapage(amp); 2408 release_metapage(amp);
2412 if (bmp) 2409 if (bmp)
2413 release_metapage(bmp); 2410 release_metapage(bmp);
2414 if (cmp) 2411 if (cmp)
2415 release_metapage(cmp); 2412 release_metapage(cmp);
2416 2413
2417 return (rc); 2414 return (rc);
2418 } 2415 }
2419 2416
2420 2417
2421 /* 2418 /*
2422 * NAME: diNewIAG(imap,iagnop,agno) 2419 * NAME: diNewIAG(imap,iagnop,agno)
2423 * 2420 *
2424 * FUNCTION: allocate a new iag for an allocation group. 2421 * FUNCTION: allocate a new iag for an allocation group.
2425 * 2422 *
2426 * first tries to allocate the iag from the inode map 2423 * first tries to allocate the iag from the inode map
2427 * iagfree list: 2424 * iagfree list:
2428 * if the list has free iags, the head of the list is removed 2425 * if the list has free iags, the head of the list is removed
2429 * and returned to satisfy the request. 2426 * and returned to satisfy the request.
2430 * if the inode map's iag free list is empty, the inode map 2427 * if the inode map's iag free list is empty, the inode map
2431 * is extended to hold a new iag. this new iag is initialized 2428 * is extended to hold a new iag. this new iag is initialized
2432 * and returned to satisfy the request. 2429 * and returned to satisfy the request.
2433 * 2430 *
2434 * PARAMETERS: 2431 * PARAMETERS:
2435 * imap - pointer to inode map control structure. 2432 * imap - pointer to inode map control structure.
2436 * iagnop - pointer to an iag number set with the number of the 2433 * iagnop - pointer to an iag number set with the number of the
2437 * newly allocated iag upon successful return. 2434 * newly allocated iag upon successful return.
2438 * agno - allocation group number. 2435 * agno - allocation group number.
2439 * bpp - Buffer pointer to be filled in with new IAG's buffer 2436 * bpp - Buffer pointer to be filled in with new IAG's buffer
2440 * 2437 *
2441 * RETURN VALUES: 2438 * RETURN VALUES:
2442 * 0 - success. 2439 * 0 - success.
2443 * -ENOSPC - insufficient disk resources. 2440 * -ENOSPC - insufficient disk resources.
2444 * -EIO - i/o error. 2441 * -EIO - i/o error.
2445 * 2442 *
2446 * serialization: 2443 * serialization:
2447 * AG lock held on entry/exit; 2444 * AG lock held on entry/exit;
2448 * write lock on the map is held inside; 2445 * write lock on the map is held inside;
2449 * read lock on the map is held on successful completion; 2446 * read lock on the map is held on successful completion;
2450 * 2447 *
2451 * note: new iag transaction: 2448 * note: new iag transaction:
2452 * . synchronously write iag; 2449 * . synchronously write iag;
2453 * . write log of xtree and inode of imap; 2450 * . write log of xtree and inode of imap;
2454 * . commit; 2451 * . commit;
2455 * . synchronous write of xtree (right to left, bottom to top); 2452 * . synchronous write of xtree (right to left, bottom to top);
2456 * . at start of logredo(): init in-memory imap with one additional iag page; 2453 * . at start of logredo(): init in-memory imap with one additional iag page;
2457 * . at end of logredo(): re-read imap inode to determine 2454 * . at end of logredo(): re-read imap inode to determine
2458 * new imap size; 2455 * new imap size;
2459 */ 2456 */
2460 static int 2457 static int
2461 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) 2458 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2462 { 2459 {
2463 int rc; 2460 int rc;
2464 int iagno, i, xlen; 2461 int iagno, i, xlen;
2465 struct inode *ipimap; 2462 struct inode *ipimap;
2466 struct super_block *sb; 2463 struct super_block *sb;
2467 struct jfs_sb_info *sbi; 2464 struct jfs_sb_info *sbi;
2468 struct metapage *mp; 2465 struct metapage *mp;
2469 struct iag *iagp; 2466 struct iag *iagp;
2470 s64 xaddr = 0; 2467 s64 xaddr = 0;
2471 s64 blkno; 2468 s64 blkno;
2472 tid_t tid; 2469 tid_t tid;
2473 struct inode *iplist[1]; 2470 struct inode *iplist[1];
2474 2471
2475 /* pick up pointers to the inode map and mount inodes */ 2472 /* pick up pointers to the inode map and mount inodes */
2476 ipimap = imap->im_ipimap; 2473 ipimap = imap->im_ipimap;
2477 sb = ipimap->i_sb; 2474 sb = ipimap->i_sb;
2478 sbi = JFS_SBI(sb); 2475 sbi = JFS_SBI(sb);
2479 2476
2480 /* acquire the free iag lock */ 2477 /* acquire the free iag lock */
2481 IAGFREE_LOCK(imap); 2478 IAGFREE_LOCK(imap);
2482 2479
2483 /* if there are any iags on the inode map free iag list, 2480 /* if there are any iags on the inode map free iag list,
2484 * allocate the iag from the head of the list. 2481 * allocate the iag from the head of the list.
2485 */ 2482 */
2486 if (imap->im_freeiag >= 0) { 2483 if (imap->im_freeiag >= 0) {
2487 /* pick up the iag number at the head of the list */ 2484 /* pick up the iag number at the head of the list */
2488 iagno = imap->im_freeiag; 2485 iagno = imap->im_freeiag;
2489 2486
2490 /* determine the logical block number of the iag */ 2487 /* determine the logical block number of the iag */
2491 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2488 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2492 } else { 2489 } else {
2493 /* no free iags. the inode map will have to be extented 2490 /* no free iags. the inode map will have to be extented
2494 * to include a new iag. 2491 * to include a new iag.
2495 */ 2492 */
2496 2493
2497 /* acquire inode map lock */ 2494 /* acquire inode map lock */
2498 IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); 2495 IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
2499 2496
2500 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { 2497 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
2501 IWRITE_UNLOCK(ipimap); 2498 IWRITE_UNLOCK(ipimap);
2502 IAGFREE_UNLOCK(imap); 2499 IAGFREE_UNLOCK(imap);
2503 jfs_error(imap->im_ipimap->i_sb, 2500 jfs_error(imap->im_ipimap->i_sb,
2504 "diNewIAG: ipimap->i_size is wrong"); 2501 "diNewIAG: ipimap->i_size is wrong");
2505 return -EIO; 2502 return -EIO;
2506 } 2503 }
2507 2504
2508 2505
2509 /* get the next avaliable iag number */ 2506 /* get the next avaliable iag number */
2510 iagno = imap->im_nextiag; 2507 iagno = imap->im_nextiag;
2511 2508
2512 /* make sure that we have not exceeded the maximum inode 2509 /* make sure that we have not exceeded the maximum inode
2513 * number limit. 2510 * number limit.
2514 */ 2511 */
2515 if (iagno > (MAXIAGS - 1)) { 2512 if (iagno > (MAXIAGS - 1)) {
2516 /* release the inode map lock */ 2513 /* release the inode map lock */
2517 IWRITE_UNLOCK(ipimap); 2514 IWRITE_UNLOCK(ipimap);
2518 2515
2519 rc = -ENOSPC; 2516 rc = -ENOSPC;
2520 goto out; 2517 goto out;
2521 } 2518 }
2522 2519
2523 /* 2520 /*
2524 * synchronously append new iag page. 2521 * synchronously append new iag page.
2525 */ 2522 */
2526 /* determine the logical address of iag page to append */ 2523 /* determine the logical address of iag page to append */
2527 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2524 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2528 2525
2529 /* Allocate extent for new iag page */ 2526 /* Allocate extent for new iag page */
2530 xlen = sbi->nbperpage; 2527 xlen = sbi->nbperpage;
2531 if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { 2528 if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) {
2532 /* release the inode map lock */ 2529 /* release the inode map lock */
2533 IWRITE_UNLOCK(ipimap); 2530 IWRITE_UNLOCK(ipimap);
2534 2531
2535 goto out; 2532 goto out;
2536 } 2533 }
2537 2534
2538 /* 2535 /*
2539 * start transaction of update of the inode map 2536 * start transaction of update of the inode map
2540 * addressing structure pointing to the new iag page; 2537 * addressing structure pointing to the new iag page;
2541 */ 2538 */
2542 tid = txBegin(sb, COMMIT_FORCE); 2539 tid = txBegin(sb, COMMIT_FORCE);
2543 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 2540 mutex_lock(&JFS_IP(ipimap)->commit_mutex);
2544 2541
2545 /* update the inode map addressing structure to point to it */ 2542 /* update the inode map addressing structure to point to it */
2546 if ((rc = 2543 if ((rc =
2547 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { 2544 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
2548 txEnd(tid); 2545 txEnd(tid);
2549 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2546 mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2550 /* Free the blocks allocated for the iag since it was 2547 /* Free the blocks allocated for the iag since it was
2551 * not successfully added to the inode map 2548 * not successfully added to the inode map
2552 */ 2549 */
2553 dbFree(ipimap, xaddr, (s64) xlen); 2550 dbFree(ipimap, xaddr, (s64) xlen);
2554 2551
2555 /* release the inode map lock */ 2552 /* release the inode map lock */
2556 IWRITE_UNLOCK(ipimap); 2553 IWRITE_UNLOCK(ipimap);
2557 2554
2558 goto out; 2555 goto out;
2559 } 2556 }
2560 2557
2561 /* update the inode map's inode to reflect the extension */ 2558 /* update the inode map's inode to reflect the extension */
2562 ipimap->i_size += PSIZE; 2559 ipimap->i_size += PSIZE;
2563 inode_add_bytes(ipimap, PSIZE); 2560 inode_add_bytes(ipimap, PSIZE);
2564 2561
2565 /* assign a buffer for the page */ 2562 /* assign a buffer for the page */
2566 mp = get_metapage(ipimap, blkno, PSIZE, 0); 2563 mp = get_metapage(ipimap, blkno, PSIZE, 0);
2567 if (!mp) { 2564 if (!mp) {
2568 /* 2565 /*
2569 * This is very unlikely since we just created the 2566 * This is very unlikely since we just created the
2570 * extent, but let's try to handle it correctly 2567 * extent, but let's try to handle it correctly
2571 */ 2568 */
2572 xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, 2569 xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
2573 COMMIT_PWMAP); 2570 COMMIT_PWMAP);
2574 2571
2575 txAbort(tid, 0); 2572 txAbort(tid, 0);
2576 txEnd(tid); 2573 txEnd(tid);
2577 2574
2578 /* release the inode map lock */ 2575 /* release the inode map lock */
2579 IWRITE_UNLOCK(ipimap); 2576 IWRITE_UNLOCK(ipimap);
2580 2577
2581 rc = -EIO; 2578 rc = -EIO;
2582 goto out; 2579 goto out;
2583 } 2580 }
2584 iagp = (struct iag *) mp->data; 2581 iagp = (struct iag *) mp->data;
2585 2582
2586 /* init the iag */ 2583 /* init the iag */
2587 memset(iagp, 0, sizeof(struct iag)); 2584 memset(iagp, 0, sizeof(struct iag));
2588 iagp->iagnum = cpu_to_le32(iagno); 2585 iagp->iagnum = cpu_to_le32(iagno);
2589 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2586 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2590 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2587 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2591 iagp->iagfree = cpu_to_le32(-1); 2588 iagp->iagfree = cpu_to_le32(-1);
2592 iagp->nfreeinos = 0; 2589 iagp->nfreeinos = 0;
2593 iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); 2590 iagp->nfreeexts = cpu_to_le32(EXTSPERIAG);
2594 2591
2595 /* initialize the free inode summary map (free extent 2592 /* initialize the free inode summary map (free extent
2596 * summary map initialization handled by bzero). 2593 * summary map initialization handled by bzero).
2597 */ 2594 */
2598 for (i = 0; i < SMAPSZ; i++) 2595 for (i = 0; i < SMAPSZ; i++)
2599 iagp->inosmap[i] = cpu_to_le32(ONES); 2596 iagp->inosmap[i] = cpu_to_le32(ONES);
2600 2597
2601 /* 2598 /*
2602 * Write and sync the metapage 2599 * Write and sync the metapage
2603 */ 2600 */
2604 flush_metapage(mp); 2601 flush_metapage(mp);
2605 2602
2606 /* 2603 /*
2607 * txCommit(COMMIT_FORCE) will synchronously write address 2604 * txCommit(COMMIT_FORCE) will synchronously write address
2608 * index pages and inode after commit in careful update order 2605 * index pages and inode after commit in careful update order
2609 * of address index pages (right to left, bottom up); 2606 * of address index pages (right to left, bottom up);
2610 */ 2607 */
2611 iplist[0] = ipimap; 2608 iplist[0] = ipimap;
2612 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 2609 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
2613 2610
2614 txEnd(tid); 2611 txEnd(tid);
2615 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2612 mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2616 2613
2617 duplicateIXtree(sb, blkno, xlen, &xaddr); 2614 duplicateIXtree(sb, blkno, xlen, &xaddr);
2618 2615
2619 /* update the next avaliable iag number */ 2616 /* update the next avaliable iag number */
2620 imap->im_nextiag += 1; 2617 imap->im_nextiag += 1;
2621 2618
2622 /* Add the iag to the iag free list so we don't lose the iag 2619 /* Add the iag to the iag free list so we don't lose the iag
2623 * if a failure happens now. 2620 * if a failure happens now.
2624 */ 2621 */
2625 imap->im_freeiag = iagno; 2622 imap->im_freeiag = iagno;
2626 2623
2627 /* Until we have logredo working, we want the imap inode & 2624 /* Until we have logredo working, we want the imap inode &
2628 * control page to be up to date. 2625 * control page to be up to date.
2629 */ 2626 */
2630 diSync(ipimap); 2627 diSync(ipimap);
2631 2628
2632 /* release the inode map lock */ 2629 /* release the inode map lock */
2633 IWRITE_UNLOCK(ipimap); 2630 IWRITE_UNLOCK(ipimap);
2634 } 2631 }
2635 2632
2636 /* obtain read lock on map */ 2633 /* obtain read lock on map */
2637 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 2634 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2638 2635
2639 /* read the iag */ 2636 /* read the iag */
2640 if ((rc = diIAGRead(imap, iagno, &mp))) { 2637 if ((rc = diIAGRead(imap, iagno, &mp))) {
2641 IREAD_UNLOCK(ipimap); 2638 IREAD_UNLOCK(ipimap);
2642 rc = -EIO; 2639 rc = -EIO;
2643 goto out; 2640 goto out;
2644 } 2641 }
2645 iagp = (struct iag *) mp->data; 2642 iagp = (struct iag *) mp->data;
2646 2643
2647 /* remove the iag from the iag free list */ 2644 /* remove the iag from the iag free list */
2648 imap->im_freeiag = le32_to_cpu(iagp->iagfree); 2645 imap->im_freeiag = le32_to_cpu(iagp->iagfree);
2649 iagp->iagfree = cpu_to_le32(-1); 2646 iagp->iagfree = cpu_to_le32(-1);
2650 2647
2651 /* set the return iag number and buffer pointer */ 2648 /* set the return iag number and buffer pointer */
2652 *iagnop = iagno; 2649 *iagnop = iagno;
2653 *mpp = mp; 2650 *mpp = mp;
2654 2651
2655 out: 2652 out:
2656 /* release the iag free lock */ 2653 /* release the iag free lock */
2657 IAGFREE_UNLOCK(imap); 2654 IAGFREE_UNLOCK(imap);
2658 2655
2659 return (rc); 2656 return (rc);
2660 } 2657 }
2661 2658
2662 /* 2659 /*
2663 * NAME: diIAGRead() 2660 * NAME: diIAGRead()
2664 * 2661 *
2665 * FUNCTION: get the buffer for the specified iag within a fileset 2662 * FUNCTION: get the buffer for the specified iag within a fileset
2666 * or aggregate inode map. 2663 * or aggregate inode map.
2667 * 2664 *
2668 * PARAMETERS: 2665 * PARAMETERS:
2669 * imap - pointer to inode map control structure. 2666 * imap - pointer to inode map control structure.
2670 * iagno - iag number. 2667 * iagno - iag number.
2671 * bpp - point to buffer pointer to be filled in on successful 2668 * bpp - point to buffer pointer to be filled in on successful
2672 * exit. 2669 * exit.
2673 * 2670 *
2674 * SERIALIZATION: 2671 * SERIALIZATION:
2675 * must have read lock on imap inode 2672 * must have read lock on imap inode
2676 * (When called by diExtendFS, the filesystem is quiesced, therefore 2673 * (When called by diExtendFS, the filesystem is quiesced, therefore
2677 * the read lock is unnecessary.) 2674 * the read lock is unnecessary.)
2678 * 2675 *
2679 * RETURN VALUES: 2676 * RETURN VALUES:
2680 * 0 - success. 2677 * 0 - success.
2681 * -EIO - i/o error. 2678 * -EIO - i/o error.
2682 */ 2679 */
2683 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2680 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2684 { 2681 {
2685 struct inode *ipimap = imap->im_ipimap; 2682 struct inode *ipimap = imap->im_ipimap;
2686 s64 blkno; 2683 s64 blkno;
2687 2684
2688 /* compute the logical block number of the iag. */ 2685 /* compute the logical block number of the iag. */
2689 blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); 2686 blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage);
2690 2687
2691 /* read the iag. */ 2688 /* read the iag. */
2692 *mpp = read_metapage(ipimap, blkno, PSIZE, 0); 2689 *mpp = read_metapage(ipimap, blkno, PSIZE, 0);
2693 if (*mpp == NULL) { 2690 if (*mpp == NULL) {
2694 return -EIO; 2691 return -EIO;
2695 } 2692 }
2696 2693
2697 return (0); 2694 return (0);
2698 } 2695 }
2699 2696
2700 /* 2697 /*
2701 * NAME: diFindFree() 2698 * NAME: diFindFree()
2702 * 2699 *
2703 * FUNCTION: find the first free bit in a word starting at 2700 * FUNCTION: find the first free bit in a word starting at
2704 * the specified bit position. 2701 * the specified bit position.
2705 * 2702 *
2706 * PARAMETERS: 2703 * PARAMETERS:
2707 * word - word to be examined. 2704 * word - word to be examined.
2708 * start - starting bit position. 2705 * start - starting bit position.
2709 * 2706 *
2710 * RETURN VALUES: 2707 * RETURN VALUES:
2711 * bit position of first free bit in the word or 32 if 2708 * bit position of first free bit in the word or 32 if
2712 * no free bits were found. 2709 * no free bits were found.
2713 */ 2710 */
2714 static int diFindFree(u32 word, int start) 2711 static int diFindFree(u32 word, int start)
2715 { 2712 {
2716 int bitno; 2713 int bitno;
2717 assert(start < 32); 2714 assert(start < 32);
2718 /* scan the word for the first free bit. */ 2715 /* scan the word for the first free bit. */
2719 for (word <<= start, bitno = start; bitno < 32; 2716 for (word <<= start, bitno = start; bitno < 32;
2720 bitno++, word <<= 1) { 2717 bitno++, word <<= 1) {
2721 if ((word & HIGHORDER) == 0) 2718 if ((word & HIGHORDER) == 0)
2722 break; 2719 break;
2723 } 2720 }
2724 return (bitno); 2721 return (bitno);
2725 } 2722 }
2726 2723
2727 /* 2724 /*
2728 * NAME: diUpdatePMap() 2725 * NAME: diUpdatePMap()
2729 * 2726 *
2730 * FUNCTION: Update the persistent map in an IAG for the allocation or 2727 * FUNCTION: Update the persistent map in an IAG for the allocation or
2731 * freeing of the specified inode. 2728 * freeing of the specified inode.
2732 * 2729 *
2733 * PRE CONDITIONS: Working map has already been updated for allocate. 2730 * PRE CONDITIONS: Working map has already been updated for allocate.
2734 * 2731 *
2735 * PARAMETERS: 2732 * PARAMETERS:
2736 * ipimap - Incore inode map inode 2733 * ipimap - Incore inode map inode
2737 * inum - Number of inode to mark in permanent map 2734 * inum - Number of inode to mark in permanent map
2738 * is_free - If 'true' indicates inode should be marked freed, otherwise 2735 * is_free - If 'true' indicates inode should be marked freed, otherwise
2739 * indicates inode should be marked allocated. 2736 * indicates inode should be marked allocated.
2740 * 2737 *
2741 * RETURN VALUES: 2738 * RETURN VALUES:
2742 * 0 for success 2739 * 0 for success
2743 */ 2740 */
2744 int 2741 int
2745 diUpdatePMap(struct inode *ipimap, 2742 diUpdatePMap(struct inode *ipimap,
2746 unsigned long inum, bool is_free, struct tblock * tblk) 2743 unsigned long inum, bool is_free, struct tblock * tblk)
2747 { 2744 {
2748 int rc; 2745 int rc;
2749 struct iag *iagp; 2746 struct iag *iagp;
2750 struct metapage *mp; 2747 struct metapage *mp;
2751 int iagno, ino, extno, bitno; 2748 int iagno, ino, extno, bitno;
2752 struct inomap *imap; 2749 struct inomap *imap;
2753 u32 mask; 2750 u32 mask;
2754 struct jfs_log *log; 2751 struct jfs_log *log;
2755 int lsn, difft, diffp; 2752 int lsn, difft, diffp;
2756 unsigned long flags; 2753 unsigned long flags;
2757 2754
2758 imap = JFS_IP(ipimap)->i_imap; 2755 imap = JFS_IP(ipimap)->i_imap;
2759 /* get the iag number containing the inode */ 2756 /* get the iag number containing the inode */
2760 iagno = INOTOIAG(inum); 2757 iagno = INOTOIAG(inum);
2761 /* make sure that the iag is contained within the map */ 2758 /* make sure that the iag is contained within the map */
2762 if (iagno >= imap->im_nextiag) { 2759 if (iagno >= imap->im_nextiag) {
2763 jfs_error(ipimap->i_sb, 2760 jfs_error(ipimap->i_sb,
2764 "diUpdatePMap: the iag is outside the map"); 2761 "diUpdatePMap: the iag is outside the map");
2765 return -EIO; 2762 return -EIO;
2766 } 2763 }
2767 /* read the iag */ 2764 /* read the iag */
2768 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 2765 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2769 rc = diIAGRead(imap, iagno, &mp); 2766 rc = diIAGRead(imap, iagno, &mp);
2770 IREAD_UNLOCK(ipimap); 2767 IREAD_UNLOCK(ipimap);
2771 if (rc) 2768 if (rc)
2772 return (rc); 2769 return (rc);
2773 metapage_wait_for_io(mp); 2770 metapage_wait_for_io(mp);
2774 iagp = (struct iag *) mp->data; 2771 iagp = (struct iag *) mp->data;
2775 /* get the inode number and extent number of the inode within 2772 /* get the inode number and extent number of the inode within
2776 * the iag and the inode number within the extent. 2773 * the iag and the inode number within the extent.
2777 */ 2774 */
2778 ino = inum & (INOSPERIAG - 1); 2775 ino = inum & (INOSPERIAG - 1);
2779 extno = ino >> L2INOSPEREXT; 2776 extno = ino >> L2INOSPEREXT;
2780 bitno = ino & (INOSPEREXT - 1); 2777 bitno = ino & (INOSPEREXT - 1);
2781 mask = HIGHORDER >> bitno; 2778 mask = HIGHORDER >> bitno;
2782 /* 2779 /*
2783 * mark the inode free in persistent map: 2780 * mark the inode free in persistent map:
2784 */ 2781 */
2785 if (is_free) { 2782 if (is_free) {
2786 /* The inode should have been allocated both in working 2783 /* The inode should have been allocated both in working
2787 * map and in persistent map; 2784 * map and in persistent map;
2788 * the inode will be freed from working map at the release 2785 * the inode will be freed from working map at the release
2789 * of last reference release; 2786 * of last reference release;
2790 */ 2787 */
2791 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2788 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2792 jfs_error(ipimap->i_sb, 2789 jfs_error(ipimap->i_sb,
2793 "diUpdatePMap: inode %ld not marked as " 2790 "diUpdatePMap: inode %ld not marked as "
2794 "allocated in wmap!", inum); 2791 "allocated in wmap!", inum);
2795 } 2792 }
2796 if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { 2793 if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) {
2797 jfs_error(ipimap->i_sb, 2794 jfs_error(ipimap->i_sb,
2798 "diUpdatePMap: inode %ld not marked as " 2795 "diUpdatePMap: inode %ld not marked as "
2799 "allocated in pmap!", inum); 2796 "allocated in pmap!", inum);
2800 } 2797 }
2801 /* update the bitmap for the extent of the freed inode */ 2798 /* update the bitmap for the extent of the freed inode */
2802 iagp->pmap[extno] &= cpu_to_le32(~mask); 2799 iagp->pmap[extno] &= cpu_to_le32(~mask);
2803 } 2800 }
2804 /* 2801 /*
2805 * mark the inode allocated in persistent map: 2802 * mark the inode allocated in persistent map:
2806 */ 2803 */
2807 else { 2804 else {
2808 /* The inode should be already allocated in the working map 2805 /* The inode should be already allocated in the working map
2809 * and should be free in persistent map; 2806 * and should be free in persistent map;
2810 */ 2807 */
2811 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2808 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2812 release_metapage(mp); 2809 release_metapage(mp);
2813 jfs_error(ipimap->i_sb, 2810 jfs_error(ipimap->i_sb,
2814 "diUpdatePMap: the inode is not allocated in " 2811 "diUpdatePMap: the inode is not allocated in "
2815 "the working map"); 2812 "the working map");
2816 return -EIO; 2813 return -EIO;
2817 } 2814 }
2818 if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { 2815 if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) {
2819 release_metapage(mp); 2816 release_metapage(mp);
2820 jfs_error(ipimap->i_sb, 2817 jfs_error(ipimap->i_sb,
2821 "diUpdatePMap: the inode is not free in the " 2818 "diUpdatePMap: the inode is not free in the "
2822 "persistent map"); 2819 "persistent map");
2823 return -EIO; 2820 return -EIO;
2824 } 2821 }
2825 /* update the bitmap for the extent of the allocated inode */ 2822 /* update the bitmap for the extent of the allocated inode */
2826 iagp->pmap[extno] |= cpu_to_le32(mask); 2823 iagp->pmap[extno] |= cpu_to_le32(mask);
2827 } 2824 }
2828 /* 2825 /*
2829 * update iag lsn 2826 * update iag lsn
2830 */ 2827 */
2831 lsn = tblk->lsn; 2828 lsn = tblk->lsn;
2832 log = JFS_SBI(tblk->sb)->log; 2829 log = JFS_SBI(tblk->sb)->log;
2833 LOGSYNC_LOCK(log, flags); 2830 LOGSYNC_LOCK(log, flags);
2834 if (mp->lsn != 0) { 2831 if (mp->lsn != 0) {
2835 /* inherit older/smaller lsn */ 2832 /* inherit older/smaller lsn */
2836 logdiff(difft, lsn, log); 2833 logdiff(difft, lsn, log);
2837 logdiff(diffp, mp->lsn, log); 2834 logdiff(diffp, mp->lsn, log);
2838 if (difft < diffp) { 2835 if (difft < diffp) {
2839 mp->lsn = lsn; 2836 mp->lsn = lsn;
2840 /* move mp after tblock in logsync list */ 2837 /* move mp after tblock in logsync list */
2841 list_move(&mp->synclist, &tblk->synclist); 2838 list_move(&mp->synclist, &tblk->synclist);
2842 } 2839 }
2843 /* inherit younger/larger clsn */ 2840 /* inherit younger/larger clsn */
2844 assert(mp->clsn); 2841 assert(mp->clsn);
2845 logdiff(difft, tblk->clsn, log); 2842 logdiff(difft, tblk->clsn, log);
2846 logdiff(diffp, mp->clsn, log); 2843 logdiff(diffp, mp->clsn, log);
2847 if (difft > diffp) 2844 if (difft > diffp)
2848 mp->clsn = tblk->clsn; 2845 mp->clsn = tblk->clsn;
2849 } else { 2846 } else {
2850 mp->log = log; 2847 mp->log = log;
2851 mp->lsn = lsn; 2848 mp->lsn = lsn;
2852 /* insert mp after tblock in logsync list */ 2849 /* insert mp after tblock in logsync list */
2853 log->count++; 2850 log->count++;
2854 list_add(&mp->synclist, &tblk->synclist); 2851 list_add(&mp->synclist, &tblk->synclist);
2855 mp->clsn = tblk->clsn; 2852 mp->clsn = tblk->clsn;
2856 } 2853 }
2857 LOGSYNC_UNLOCK(log, flags); 2854 LOGSYNC_UNLOCK(log, flags);
2858 write_metapage(mp); 2855 write_metapage(mp);
2859 return (0); 2856 return (0);
2860 } 2857 }
2861 2858
2862 /* 2859 /*
2863 * diExtendFS() 2860 * diExtendFS()
2864 * 2861 *
2865 * function: update imap for extendfs(); 2862 * function: update imap for extendfs();
2866 * 2863 *
2867 * note: AG size has been increased s.t. each k old contiguous AGs are 2864 * note: AG size has been increased s.t. each k old contiguous AGs are
2868 * coalesced into a new AG; 2865 * coalesced into a new AG;
2869 */ 2866 */
2870 int diExtendFS(struct inode *ipimap, struct inode *ipbmap) 2867 int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2871 { 2868 {
2872 int rc, rcx = 0; 2869 int rc, rcx = 0;
2873 struct inomap *imap = JFS_IP(ipimap)->i_imap; 2870 struct inomap *imap = JFS_IP(ipimap)->i_imap;
2874 struct iag *iagp = NULL, *hiagp = NULL; 2871 struct iag *iagp = NULL, *hiagp = NULL;
2875 struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; 2872 struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap;
2876 struct metapage *bp, *hbp; 2873 struct metapage *bp, *hbp;
2877 int i, n, head; 2874 int i, n, head;
2878 int numinos, xnuminos = 0, xnumfree = 0; 2875 int numinos, xnuminos = 0, xnumfree = 0;
2879 s64 agstart; 2876 s64 agstart;
2880 2877
2881 jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", 2878 jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d",
2882 imap->im_nextiag, atomic_read(&imap->im_numinos), 2879 imap->im_nextiag, atomic_read(&imap->im_numinos),
2883 atomic_read(&imap->im_numfree)); 2880 atomic_read(&imap->im_numfree));
2884 2881
2885 /* 2882 /*
2886 * reconstruct imap 2883 * reconstruct imap
2887 * 2884 *
2888 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2885 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2889 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2886 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
2890 * note: new AG size = old AG size * (2**x). 2887 * note: new AG size = old AG size * (2**x).
2891 */ 2888 */
2892 2889
2893 /* init per AG control information im_agctl[] */ 2890 /* init per AG control information im_agctl[] */
2894 for (i = 0; i < MAXAG; i++) { 2891 for (i = 0; i < MAXAG; i++) {
2895 imap->im_agctl[i].inofree = -1; 2892 imap->im_agctl[i].inofree = -1;
2896 imap->im_agctl[i].extfree = -1; 2893 imap->im_agctl[i].extfree = -1;
2897 imap->im_agctl[i].numinos = 0; /* number of backed inodes */ 2894 imap->im_agctl[i].numinos = 0; /* number of backed inodes */
2898 imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ 2895 imap->im_agctl[i].numfree = 0; /* number of free backed inodes */
2899 } 2896 }
2900 2897
2901 /* 2898 /*
2902 * process each iag page of the map. 2899 * process each iag page of the map.
2903 * 2900 *
2904 * rebuild AG Free Inode List, AG Free Inode Extent List; 2901 * rebuild AG Free Inode List, AG Free Inode Extent List;
2905 */ 2902 */
2906 for (i = 0; i < imap->im_nextiag; i++) { 2903 for (i = 0; i < imap->im_nextiag; i++) {
2907 if ((rc = diIAGRead(imap, i, &bp))) { 2904 if ((rc = diIAGRead(imap, i, &bp))) {
2908 rcx = rc; 2905 rcx = rc;
2909 continue; 2906 continue;
2910 } 2907 }
2911 iagp = (struct iag *) bp->data; 2908 iagp = (struct iag *) bp->data;
2912 if (le32_to_cpu(iagp->iagnum) != i) { 2909 if (le32_to_cpu(iagp->iagnum) != i) {
2913 release_metapage(bp); 2910 release_metapage(bp);
2914 jfs_error(ipimap->i_sb, 2911 jfs_error(ipimap->i_sb,
2915 "diExtendFs: unexpected value of iagnum"); 2912 "diExtendFs: unexpected value of iagnum");
2916 return -EIO; 2913 return -EIO;
2917 } 2914 }
2918 2915
2919 /* leave free iag in the free iag list */ 2916 /* leave free iag in the free iag list */
2920 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2917 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2921 release_metapage(bp); 2918 release_metapage(bp);
2922 continue; 2919 continue;
2923 } 2920 }
2924 2921
2925 /* agstart that computes to the same ag is treated as same; */ 2922 /* agstart that computes to the same ag is treated as same; */
2926 agstart = le64_to_cpu(iagp->agstart); 2923 agstart = le64_to_cpu(iagp->agstart);
2927 /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ 2924 /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */
2928 n = agstart >> mp->db_agl2size; 2925 n = agstart >> mp->db_agl2size;
2929 2926
2930 /* compute backed inodes */ 2927 /* compute backed inodes */
2931 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) 2928 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
2932 << L2INOSPEREXT; 2929 << L2INOSPEREXT;
2933 if (numinos > 0) { 2930 if (numinos > 0) {
2934 /* merge AG backed inodes */ 2931 /* merge AG backed inodes */
2935 imap->im_agctl[n].numinos += numinos; 2932 imap->im_agctl[n].numinos += numinos;
2936 xnuminos += numinos; 2933 xnuminos += numinos;
2937 } 2934 }
2938 2935
2939 /* if any backed free inodes, insert at AG free inode list */ 2936 /* if any backed free inodes, insert at AG free inode list */
2940 if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { 2937 if ((int) le32_to_cpu(iagp->nfreeinos) > 0) {
2941 if ((head = imap->im_agctl[n].inofree) == -1) { 2938 if ((head = imap->im_agctl[n].inofree) == -1) {
2942 iagp->inofreefwd = cpu_to_le32(-1); 2939 iagp->inofreefwd = cpu_to_le32(-1);
2943 iagp->inofreeback = cpu_to_le32(-1); 2940 iagp->inofreeback = cpu_to_le32(-1);
2944 } else { 2941 } else {
2945 if ((rc = diIAGRead(imap, head, &hbp))) { 2942 if ((rc = diIAGRead(imap, head, &hbp))) {
2946 rcx = rc; 2943 rcx = rc;
2947 goto nextiag; 2944 goto nextiag;
2948 } 2945 }
2949 hiagp = (struct iag *) hbp->data; 2946 hiagp = (struct iag *) hbp->data;
2950 hiagp->inofreeback = iagp->iagnum; 2947 hiagp->inofreeback = iagp->iagnum;
2951 iagp->inofreefwd = cpu_to_le32(head); 2948 iagp->inofreefwd = cpu_to_le32(head);
2952 iagp->inofreeback = cpu_to_le32(-1); 2949 iagp->inofreeback = cpu_to_le32(-1);
2953 write_metapage(hbp); 2950 write_metapage(hbp);
2954 } 2951 }
2955 2952
2956 imap->im_agctl[n].inofree = 2953 imap->im_agctl[n].inofree =
2957 le32_to_cpu(iagp->iagnum); 2954 le32_to_cpu(iagp->iagnum);
2958 2955
2959 /* merge AG backed free inodes */ 2956 /* merge AG backed free inodes */
2960 imap->im_agctl[n].numfree += 2957 imap->im_agctl[n].numfree +=
2961 le32_to_cpu(iagp->nfreeinos); 2958 le32_to_cpu(iagp->nfreeinos);
2962 xnumfree += le32_to_cpu(iagp->nfreeinos); 2959 xnumfree += le32_to_cpu(iagp->nfreeinos);
2963 } 2960 }
2964 2961
2965 /* if any free extents, insert at AG free extent list */ 2962 /* if any free extents, insert at AG free extent list */
2966 if (le32_to_cpu(iagp->nfreeexts) > 0) { 2963 if (le32_to_cpu(iagp->nfreeexts) > 0) {
2967 if ((head = imap->im_agctl[n].extfree) == -1) { 2964 if ((head = imap->im_agctl[n].extfree) == -1) {
2968 iagp->extfreefwd = cpu_to_le32(-1); 2965 iagp->extfreefwd = cpu_to_le32(-1);
2969 iagp->extfreeback = cpu_to_le32(-1); 2966 iagp->extfreeback = cpu_to_le32(-1);
2970 } else { 2967 } else {
2971 if ((rc = diIAGRead(imap, head, &hbp))) { 2968 if ((rc = diIAGRead(imap, head, &hbp))) {
2972 rcx = rc; 2969 rcx = rc;
2973 goto nextiag; 2970 goto nextiag;
2974 } 2971 }
2975 hiagp = (struct iag *) hbp->data; 2972 hiagp = (struct iag *) hbp->data;
2976 hiagp->extfreeback = iagp->iagnum; 2973 hiagp->extfreeback = iagp->iagnum;
2977 iagp->extfreefwd = cpu_to_le32(head); 2974 iagp->extfreefwd = cpu_to_le32(head);
2978 iagp->extfreeback = cpu_to_le32(-1); 2975 iagp->extfreeback = cpu_to_le32(-1);
2979 write_metapage(hbp); 2976 write_metapage(hbp);
2980 } 2977 }
2981 2978
2982 imap->im_agctl[n].extfree = 2979 imap->im_agctl[n].extfree =
2983 le32_to_cpu(iagp->iagnum); 2980 le32_to_cpu(iagp->iagnum);
2984 } 2981 }
2985 2982
2986 nextiag: 2983 nextiag:
2987 write_metapage(bp); 2984 write_metapage(bp);
2988 } 2985 }
2989 2986
2990 if (xnuminos != atomic_read(&imap->im_numinos) || 2987 if (xnuminos != atomic_read(&imap->im_numinos) ||
2991 xnumfree != atomic_read(&imap->im_numfree)) { 2988 xnumfree != atomic_read(&imap->im_numfree)) {
2992 jfs_error(ipimap->i_sb, 2989 jfs_error(ipimap->i_sb,
2993 "diExtendFs: numinos or numfree incorrect"); 2990 "diExtendFs: numinos or numfree incorrect");
2994 return -EIO; 2991 return -EIO;
2995 } 2992 }
2996 2993
2997 return rcx; 2994 return rcx;
2998 } 2995 }
2999 2996
3000 2997
3001 /* 2998 /*
3002 * duplicateIXtree() 2999 * duplicateIXtree()
3003 * 3000 *
3004 * serialization: IWRITE_LOCK held on entry/exit 3001 * serialization: IWRITE_LOCK held on entry/exit
3005 * 3002 *
3006 * note: shadow page with regular inode (rel.2); 3003 * note: shadow page with regular inode (rel.2);
3007 */ 3004 */
3008 static void duplicateIXtree(struct super_block *sb, s64 blkno, 3005 static void duplicateIXtree(struct super_block *sb, s64 blkno,
3009 int xlen, s64 *xaddr) 3006 int xlen, s64 *xaddr)
3010 { 3007 {
3011 struct jfs_superblock *j_sb; 3008 struct jfs_superblock *j_sb;
3012 struct buffer_head *bh; 3009 struct buffer_head *bh;
3013 struct inode *ip; 3010 struct inode *ip;
3014 tid_t tid; 3011 tid_t tid;
3015 3012
3016 /* if AIT2 ipmap2 is bad, do not try to update it */ 3013 /* if AIT2 ipmap2 is bad, do not try to update it */
3017 if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ 3014 if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */
3018 return; 3015 return;
3019 ip = diReadSpecial(sb, FILESYSTEM_I, 1); 3016 ip = diReadSpecial(sb, FILESYSTEM_I, 1);
3020 if (ip == NULL) { 3017 if (ip == NULL) {
3021 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3018 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3022 if (readSuper(sb, &bh)) 3019 if (readSuper(sb, &bh))
3023 return; 3020 return;
3024 j_sb = (struct jfs_superblock *)bh->b_data; 3021 j_sb = (struct jfs_superblock *)bh->b_data;
3025 j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); 3022 j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
3026 3023
3027 mark_buffer_dirty(bh); 3024 mark_buffer_dirty(bh);
3028 sync_dirty_buffer(bh); 3025 sync_dirty_buffer(bh);
3029 brelse(bh); 3026 brelse(bh);
3030 return; 3027 return;
3031 } 3028 }
3032 3029
3033 /* start transaction */ 3030 /* start transaction */
3034 tid = txBegin(sb, COMMIT_FORCE); 3031 tid = txBegin(sb, COMMIT_FORCE);
3035 /* update the inode map addressing structure to point to it */ 3032 /* update the inode map addressing structure to point to it */
3036 if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { 3033 if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) {
3037 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3034 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3038 txAbort(tid, 1); 3035 txAbort(tid, 1);
3039 goto cleanup; 3036 goto cleanup;
3040 3037
3041 } 3038 }
3042 /* update the inode map's inode to reflect the extension */ 3039 /* update the inode map's inode to reflect the extension */
3043 ip->i_size += PSIZE; 3040 ip->i_size += PSIZE;
3044 inode_add_bytes(ip, PSIZE); 3041 inode_add_bytes(ip, PSIZE);
3045 txCommit(tid, 1, &ip, COMMIT_FORCE); 3042 txCommit(tid, 1, &ip, COMMIT_FORCE);
3046 cleanup: 3043 cleanup:
3047 txEnd(tid); 3044 txEnd(tid);
3048 diFreeSpecial(ip); 3045 diFreeSpecial(ip);
3049 } 3046 }
3050 3047
3051 /* 3048 /*
3052 * NAME: copy_from_dinode() 3049 * NAME: copy_from_dinode()
3053 * 3050 *
3054 * FUNCTION: Copies inode info from disk inode to in-memory inode 3051 * FUNCTION: Copies inode info from disk inode to in-memory inode
3055 * 3052 *
3056 * RETURN VALUES: 3053 * RETURN VALUES:
3057 * 0 - success 3054 * 0 - success
3058 * -ENOMEM - insufficient memory 3055 * -ENOMEM - insufficient memory
3059 */ 3056 */
3060 static int copy_from_dinode(struct dinode * dip, struct inode *ip) 3057 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3061 { 3058 {
3062 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3059 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3063 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 3060 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3064 3061
3065 jfs_ip->fileset = le32_to_cpu(dip->di_fileset); 3062 jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3066 jfs_ip->mode2 = le32_to_cpu(dip->di_mode); 3063 jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3067 jfs_set_inode_flags(ip); 3064 jfs_set_inode_flags(ip);
3068 3065
3069 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; 3066 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
3070 if (sbi->umask != -1) { 3067 if (sbi->umask != -1) {
3071 ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); 3068 ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
3072 /* For directories, add x permission if r is allowed by umask */ 3069 /* For directories, add x permission if r is allowed by umask */
3073 if (S_ISDIR(ip->i_mode)) { 3070 if (S_ISDIR(ip->i_mode)) {
3074 if (ip->i_mode & 0400) 3071 if (ip->i_mode & 0400)
3075 ip->i_mode |= 0100; 3072 ip->i_mode |= 0100;
3076 if (ip->i_mode & 0040) 3073 if (ip->i_mode & 0040)
3077 ip->i_mode |= 0010; 3074 ip->i_mode |= 0010;
3078 if (ip->i_mode & 0004) 3075 if (ip->i_mode & 0004)
3079 ip->i_mode |= 0001; 3076 ip->i_mode |= 0001;
3080 } 3077 }
3081 } 3078 }
3082 ip->i_nlink = le32_to_cpu(dip->di_nlink); 3079 ip->i_nlink = le32_to_cpu(dip->di_nlink);
3083 3080
3084 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); 3081 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
3085 if (sbi->uid == -1) 3082 if (sbi->uid == -1)
3086 ip->i_uid = jfs_ip->saved_uid; 3083 ip->i_uid = jfs_ip->saved_uid;
3087 else { 3084 else {
3088 ip->i_uid = sbi->uid; 3085 ip->i_uid = sbi->uid;
3089 } 3086 }
3090 3087
3091 jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); 3088 jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
3092 if (sbi->gid == -1) 3089 if (sbi->gid == -1)
3093 ip->i_gid = jfs_ip->saved_gid; 3090 ip->i_gid = jfs_ip->saved_gid;
3094 else { 3091 else {
3095 ip->i_gid = sbi->gid; 3092 ip->i_gid = sbi->gid;
3096 } 3093 }
3097 3094
3098 ip->i_size = le64_to_cpu(dip->di_size); 3095 ip->i_size = le64_to_cpu(dip->di_size);
3099 ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); 3096 ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
3100 ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); 3097 ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
3101 ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); 3098 ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
3102 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); 3099 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
3103 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); 3100 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
3104 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); 3101 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
3105 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); 3102 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
3106 ip->i_generation = le32_to_cpu(dip->di_gen); 3103 ip->i_generation = le32_to_cpu(dip->di_gen);
3107 3104
3108 jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ 3105 jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */
3109 jfs_ip->acl = dip->di_acl; /* as are dxd's */ 3106 jfs_ip->acl = dip->di_acl; /* as are dxd's */
3110 jfs_ip->ea = dip->di_ea; 3107 jfs_ip->ea = dip->di_ea;
3111 jfs_ip->next_index = le32_to_cpu(dip->di_next_index); 3108 jfs_ip->next_index = le32_to_cpu(dip->di_next_index);
3112 jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); 3109 jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec);
3113 jfs_ip->acltype = le32_to_cpu(dip->di_acltype); 3110 jfs_ip->acltype = le32_to_cpu(dip->di_acltype);
3114 3111
3115 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { 3112 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) {
3116 jfs_ip->dev = le32_to_cpu(dip->di_rdev); 3113 jfs_ip->dev = le32_to_cpu(dip->di_rdev);
3117 ip->i_rdev = new_decode_dev(jfs_ip->dev); 3114 ip->i_rdev = new_decode_dev(jfs_ip->dev);
3118 } 3115 }
3119 3116
3120 if (S_ISDIR(ip->i_mode)) { 3117 if (S_ISDIR(ip->i_mode)) {
3121 memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); 3118 memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384);
3122 } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { 3119 } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) {
3123 memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); 3120 memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288);
3124 } else 3121 } else
3125 memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); 3122 memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128);
3126 3123
3127 /* Zero the in-memory-only stuff */ 3124 /* Zero the in-memory-only stuff */
3128 jfs_ip->cflag = 0; 3125 jfs_ip->cflag = 0;
3129 jfs_ip->btindex = 0; 3126 jfs_ip->btindex = 0;
3130 jfs_ip->btorder = 0; 3127 jfs_ip->btorder = 0;
3131 jfs_ip->bxflag = 0; 3128 jfs_ip->bxflag = 0;
3132 jfs_ip->blid = 0; 3129 jfs_ip->blid = 0;
3133 jfs_ip->atlhead = 0; 3130 jfs_ip->atlhead = 0;
3134 jfs_ip->atltail = 0; 3131 jfs_ip->atltail = 0;
3135 jfs_ip->xtlid = 0; 3132 jfs_ip->xtlid = 0;
3136 return (0); 3133 return (0);
3137 } 3134 }
3138 3135
3139 /* 3136 /*
3140 * NAME: copy_to_dinode() 3137 * NAME: copy_to_dinode()
3141 * 3138 *
3142 * FUNCTION: Copies inode info from in-memory inode to disk inode 3139 * FUNCTION: Copies inode info from in-memory inode to disk inode
3143 */ 3140 */
3144 static void copy_to_dinode(struct dinode * dip, struct inode *ip) 3141 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3145 { 3142 {
3146 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3143 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3147 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 3144 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3148 3145
3149 dip->di_fileset = cpu_to_le32(jfs_ip->fileset); 3146 dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
3150 dip->di_inostamp = cpu_to_le32(sbi->inostamp); 3147 dip->di_inostamp = cpu_to_le32(sbi->inostamp);
3151 dip->di_number = cpu_to_le32(ip->i_ino); 3148 dip->di_number = cpu_to_le32(ip->i_ino);
3152 dip->di_gen = cpu_to_le32(ip->i_generation); 3149 dip->di_gen = cpu_to_le32(ip->i_generation);
3153 dip->di_size = cpu_to_le64(ip->i_size); 3150 dip->di_size = cpu_to_le64(ip->i_size);
3154 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); 3151 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3155 dip->di_nlink = cpu_to_le32(ip->i_nlink); 3152 dip->di_nlink = cpu_to_le32(ip->i_nlink);
3156 if (sbi->uid == -1) 3153 if (sbi->uid == -1)
3157 dip->di_uid = cpu_to_le32(ip->i_uid); 3154 dip->di_uid = cpu_to_le32(ip->i_uid);
3158 else 3155 else
3159 dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); 3156 dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
3160 if (sbi->gid == -1) 3157 if (sbi->gid == -1)
3161 dip->di_gid = cpu_to_le32(ip->i_gid); 3158 dip->di_gid = cpu_to_le32(ip->i_gid);
3162 else 3159 else
3163 dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); 3160 dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
3164 jfs_get_inode_flags(jfs_ip); 3161 jfs_get_inode_flags(jfs_ip);
3165 /* 3162 /*
3166 * mode2 is only needed for storing the higher order bits. 3163 * mode2 is only needed for storing the higher order bits.
3167 * Trust i_mode for the lower order ones 3164 * Trust i_mode for the lower order ones
3168 */ 3165 */
3169 if (sbi->umask == -1) 3166 if (sbi->umask == -1)
3170 dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | 3167 dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
3171 ip->i_mode); 3168 ip->i_mode);
3172 else /* Leave the original permissions alone */ 3169 else /* Leave the original permissions alone */
3173 dip->di_mode = cpu_to_le32(jfs_ip->mode2); 3170 dip->di_mode = cpu_to_le32(jfs_ip->mode2);
3174 3171
3175 dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); 3172 dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
3176 dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); 3173 dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
3177 dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); 3174 dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
3178 dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); 3175 dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
3179 dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); 3176 dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
3180 dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); 3177 dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
3181 dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ 3178 dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */
3182 dip->di_acl = jfs_ip->acl; /* as are dxd's */ 3179 dip->di_acl = jfs_ip->acl; /* as are dxd's */
3183 dip->di_ea = jfs_ip->ea; 3180 dip->di_ea = jfs_ip->ea;
3184 dip->di_next_index = cpu_to_le32(jfs_ip->next_index); 3181 dip->di_next_index = cpu_to_le32(jfs_ip->next_index);
3185 dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); 3182 dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime);
3186 dip->di_otime.tv_nsec = 0; 3183 dip->di_otime.tv_nsec = 0;
3187 dip->di_acltype = cpu_to_le32(jfs_ip->acltype); 3184 dip->di_acltype = cpu_to_le32(jfs_ip->acltype);
3188 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) 3185 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
3189 dip->di_rdev = cpu_to_le32(jfs_ip->dev); 3186 dip->di_rdev = cpu_to_le32(jfs_ip->dev);
3190 } 3187 }
3191 3188
1 /* 1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2005 2 * Copyright (C) International Business Machines Corp., 2000-2005
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 /* 18 /*
19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager
20 */ 20 */
21 21
22 #include <linux/fs.h> 22 #include <linux/fs.h>
23 #include <linux/quotaops.h> 23 #include <linux/quotaops.h>
24 #include "jfs_incore.h" 24 #include "jfs_incore.h"
25 #include "jfs_filsys.h" 25 #include "jfs_filsys.h"
26 #include "jfs_metapage.h" 26 #include "jfs_metapage.h"
27 #include "jfs_dmap.h" 27 #include "jfs_dmap.h"
28 #include "jfs_dinode.h" 28 #include "jfs_dinode.h"
29 #include "jfs_superblock.h" 29 #include "jfs_superblock.h"
30 #include "jfs_debug.h" 30 #include "jfs_debug.h"
31 31
32 /* 32 /*
33 * xtree local flag 33 * xtree local flag
34 */ 34 */
35 #define XT_INSERT 0x00000001 35 #define XT_INSERT 0x00000001
36 36
37 /* 37 /*
38 * xtree key/entry comparison: extent offset 38 * xtree key/entry comparison: extent offset
39 * 39 *
40 * return: 40 * return:
41 * -1: k < start of extent 41 * -1: k < start of extent
42 * 0: start_of_extent <= k <= end_of_extent 42 * 0: start_of_extent <= k <= end_of_extent
43 * 1: k > end_of_extent 43 * 1: k > end_of_extent
44 */ 44 */
45 #define XT_CMP(CMP, K, X, OFFSET64)\ 45 #define XT_CMP(CMP, K, X, OFFSET64)\
46 {\ 46 {\
47 OFFSET64 = offsetXAD(X);\ 47 OFFSET64 = offsetXAD(X);\
48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
49 ((K) < OFFSET64) ? -1 : 0;\ 49 ((K) < OFFSET64) ? -1 : 0;\
50 } 50 }
51 51
52 /* write a xad entry */ 52 /* write a xad entry */
53 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 53 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
54 {\ 54 {\
55 (XAD)->flag = (FLAG);\ 55 (XAD)->flag = (FLAG);\
56 XADoffset((XAD), (OFF));\ 56 XADoffset((XAD), (OFF));\
57 XADlength((XAD), (LEN));\ 57 XADlength((XAD), (LEN));\
58 XADaddress((XAD), (ADDR));\ 58 XADaddress((XAD), (ADDR));\
59 } 59 }
60 60
61 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 61 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
62 62
63 /* get page buffer for specified block address */ 63 /* get page buffer for specified block address */
64 /* ToDo: Replace this ugly macro with a function */ 64 /* ToDo: Replace this ugly macro with a function */
65 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 65 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
66 {\ 66 {\
67 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 67 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
68 if (!(RC))\ 68 if (!(RC))\
69 {\ 69 {\
70 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 70 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
71 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 71 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
72 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 72 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
73 {\ 73 {\
74 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 74 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
75 BT_PUTPAGE(MP);\ 75 BT_PUTPAGE(MP);\
76 MP = NULL;\ 76 MP = NULL;\
77 RC = -EIO;\ 77 RC = -EIO;\
78 }\ 78 }\
79 }\ 79 }\
80 } 80 }
81 81
82 /* for consistency */ 82 /* for consistency */
83 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 83 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
84 84
85 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 85 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
87 /* xtree entry parameter descriptor */ 87 /* xtree entry parameter descriptor */
88 struct xtsplit { 88 struct xtsplit {
89 struct metapage *mp; 89 struct metapage *mp;
90 s16 index; 90 s16 index;
91 u8 flag; 91 u8 flag;
92 s64 off; 92 s64 off;
93 s64 addr; 93 s64 addr;
94 int len; 94 int len;
95 struct pxdlist *pxdlist; 95 struct pxdlist *pxdlist;
96 }; 96 };
97 97
98 98
99 /* 99 /*
100 * statistics 100 * statistics
101 */ 101 */
102 #ifdef CONFIG_JFS_STATISTICS 102 #ifdef CONFIG_JFS_STATISTICS
103 static struct { 103 static struct {
104 uint search; 104 uint search;
105 uint fastSearch; 105 uint fastSearch;
106 uint split; 106 uint split;
107 } xtStat; 107 } xtStat;
108 #endif 108 #endif
109 109
110 110
111 /* 111 /*
112 * forward references 112 * forward references
113 */ 113 */
114 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 114 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
115 struct btstack * btstack, int flag); 115 struct btstack * btstack, int flag);
116 116
117 static int xtSplitUp(tid_t tid, 117 static int xtSplitUp(tid_t tid,
118 struct inode *ip, 118 struct inode *ip,
119 struct xtsplit * split, struct btstack * btstack); 119 struct xtsplit * split, struct btstack * btstack);
120 120
121 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 121 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
122 struct metapage ** rmpp, s64 * rbnp); 122 struct metapage ** rmpp, s64 * rbnp);
123 123
124 static int xtSplitRoot(tid_t tid, struct inode *ip, 124 static int xtSplitRoot(tid_t tid, struct inode *ip,
125 struct xtsplit * split, struct metapage ** rmpp); 125 struct xtsplit * split, struct metapage ** rmpp);
126 126
127 #ifdef _STILL_TO_PORT 127 #ifdef _STILL_TO_PORT
128 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 128 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
129 xtpage_t * fp, struct btstack * btstack); 129 xtpage_t * fp, struct btstack * btstack);
130 130
131 static int xtSearchNode(struct inode *ip, 131 static int xtSearchNode(struct inode *ip,
132 xad_t * xad, 132 xad_t * xad,
133 int *cmpp, struct btstack * btstack, int flag); 133 int *cmpp, struct btstack * btstack, int flag);
134 134
135 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 135 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
136 #endif /* _STILL_TO_PORT */ 136 #endif /* _STILL_TO_PORT */
137 137
138 /* 138 /*
139 * xtLookup() 139 * xtLookup()
140 * 140 *
141 * function: map a single page into a physical extent; 141 * function: map a single page into a physical extent;
142 */ 142 */
143 int xtLookup(struct inode *ip, s64 lstart, 143 int xtLookup(struct inode *ip, s64 lstart,
144 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 144 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
145 { 145 {
146 int rc = 0; 146 int rc = 0;
147 struct btstack btstack; 147 struct btstack btstack;
148 int cmp; 148 int cmp;
149 s64 bn; 149 s64 bn;
150 struct metapage *mp; 150 struct metapage *mp;
151 xtpage_t *p; 151 xtpage_t *p;
152 int index; 152 int index;
153 xad_t *xad; 153 xad_t *xad;
154 s64 next, size, xoff, xend; 154 s64 next, size, xoff, xend;
155 int xlen; 155 int xlen;
156 s64 xaddr; 156 s64 xaddr;
157 157
158 *paddr = 0; 158 *paddr = 0;
159 *plen = llen; 159 *plen = llen;
160 160
161 if (!no_check) { 161 if (!no_check) {
162 /* is lookup offset beyond eof ? */ 162 /* is lookup offset beyond eof ? */
163 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 163 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
164 JFS_SBI(ip->i_sb)->l2bsize; 164 JFS_SBI(ip->i_sb)->l2bsize;
165 if (lstart >= size) { 165 if (lstart >= size) {
166 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", 166 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
167 (ulong) lstart, (ulong) size); 167 (ulong) lstart, (ulong) size);
168 return 0; 168 return 0;
169 } 169 }
170 } 170 }
171 171
172 /* 172 /*
173 * search for the xad entry covering the logical extent 173 * search for the xad entry covering the logical extent
174 */ 174 */
175 //search: 175 //search:
176 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 176 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
177 jfs_err("xtLookup: xtSearch returned %d", rc); 177 jfs_err("xtLookup: xtSearch returned %d", rc);
178 return rc; 178 return rc;
179 } 179 }
180 180
181 /* 181 /*
182 * compute the physical extent covering logical extent 182 * compute the physical extent covering logical extent
183 * 183 *
184 * N.B. search may have failed (e.g., hole in sparse file), 184 * N.B. search may have failed (e.g., hole in sparse file),
185 * and returned the index of the next entry. 185 * and returned the index of the next entry.
186 */ 186 */
187 /* retrieve search result */ 187 /* retrieve search result */
188 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 188 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
189 189
190 /* is xad found covering start of logical extent ? 190 /* is xad found covering start of logical extent ?
191 * lstart is a page start address, 191 * lstart is a page start address,
192 * i.e., lstart cannot start in a hole; 192 * i.e., lstart cannot start in a hole;
193 */ 193 */
194 if (cmp) { 194 if (cmp) {
195 if (next) 195 if (next)
196 *plen = min(next - lstart, llen); 196 *plen = min(next - lstart, llen);
197 goto out; 197 goto out;
198 } 198 }
199 199
200 /* 200 /*
201 * lxd covered by xad 201 * lxd covered by xad
202 */ 202 */
203 xad = &p->xad[index]; 203 xad = &p->xad[index];
204 xoff = offsetXAD(xad); 204 xoff = offsetXAD(xad);
205 xlen = lengthXAD(xad); 205 xlen = lengthXAD(xad);
206 xend = xoff + xlen; 206 xend = xoff + xlen;
207 xaddr = addressXAD(xad); 207 xaddr = addressXAD(xad);
208 208
209 /* initialize new pxd */ 209 /* initialize new pxd */
210 *pflag = xad->flag; 210 *pflag = xad->flag;
211 *paddr = xaddr + (lstart - xoff); 211 *paddr = xaddr + (lstart - xoff);
212 /* a page must be fully covered by an xad */ 212 /* a page must be fully covered by an xad */
213 *plen = min(xend - lstart, llen); 213 *plen = min(xend - lstart, llen);
214 214
215 out: 215 out:
216 XT_PUTPAGE(mp); 216 XT_PUTPAGE(mp);
217 217
218 return rc; 218 return rc;
219 } 219 }
220 220
221 221
222 /* 222 /*
223 * xtLookupList() 223 * xtLookupList()
224 * 224 *
225 * function: map a single logical extent into a list of physical extent; 225 * function: map a single logical extent into a list of physical extent;
226 * 226 *
227 * parameter: 227 * parameter:
228 * struct inode *ip, 228 * struct inode *ip,
229 * struct lxdlist *lxdlist, lxd list (in) 229 * struct lxdlist *lxdlist, lxd list (in)
230 * struct xadlist *xadlist, xad list (in/out) 230 * struct xadlist *xadlist, xad list (in/out)
231 * int flag) 231 * int flag)
232 * 232 *
233 * coverage of lxd by xad under assumption of 233 * coverage of lxd by xad under assumption of
234 * . lxd's are ordered and disjoint. 234 * . lxd's are ordered and disjoint.
235 * . xad's are ordered and disjoint. 235 * . xad's are ordered and disjoint.
236 * 236 *
237 * return: 237 * return:
238 * 0: success 238 * 0: success
239 * 239 *
240 * note: a page being written (even a single byte) is backed fully, 240 * note: a page being written (even a single byte) is backed fully,
241 * except the last page which is only backed with blocks 241 * except the last page which is only backed with blocks
242 * required to cover the last byte; 242 * required to cover the last byte;
243 * the extent backing a page is fully contained within an xad; 243 * the extent backing a page is fully contained within an xad;
244 */ 244 */
245 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 245 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
246 struct xadlist * xadlist, int flag) 246 struct xadlist * xadlist, int flag)
247 { 247 {
248 int rc = 0; 248 int rc = 0;
249 struct btstack btstack; 249 struct btstack btstack;
250 int cmp; 250 int cmp;
251 s64 bn; 251 s64 bn;
252 struct metapage *mp; 252 struct metapage *mp;
253 xtpage_t *p; 253 xtpage_t *p;
254 int index; 254 int index;
255 lxd_t *lxd; 255 lxd_t *lxd;
256 xad_t *xad, *pxd; 256 xad_t *xad, *pxd;
257 s64 size, lstart, lend, xstart, xend, pstart; 257 s64 size, lstart, lend, xstart, xend, pstart;
258 s64 llen, xlen, plen; 258 s64 llen, xlen, plen;
259 s64 xaddr, paddr; 259 s64 xaddr, paddr;
260 int nlxd, npxd, maxnpxd; 260 int nlxd, npxd, maxnpxd;
261 261
262 npxd = xadlist->nxad = 0; 262 npxd = xadlist->nxad = 0;
263 maxnpxd = xadlist->maxnxad; 263 maxnpxd = xadlist->maxnxad;
264 pxd = xadlist->xad; 264 pxd = xadlist->xad;
265 265
266 nlxd = lxdlist->nlxd; 266 nlxd = lxdlist->nlxd;
267 lxd = lxdlist->lxd; 267 lxd = lxdlist->lxd;
268 268
269 lstart = offsetLXD(lxd); 269 lstart = offsetLXD(lxd);
270 llen = lengthLXD(lxd); 270 llen = lengthLXD(lxd);
271 lend = lstart + llen; 271 lend = lstart + llen;
272 272
273 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 273 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
274 JFS_SBI(ip->i_sb)->l2bsize; 274 JFS_SBI(ip->i_sb)->l2bsize;
275 275
276 /* 276 /*
277 * search for the xad entry covering the logical extent 277 * search for the xad entry covering the logical extent
278 */ 278 */
279 search: 279 search:
280 if (lstart >= size) 280 if (lstart >= size)
281 return 0; 281 return 0;
282 282
283 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) 283 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
284 return rc; 284 return rc;
285 285
286 /* 286 /*
287 * compute the physical extent covering logical extent 287 * compute the physical extent covering logical extent
288 * 288 *
289 * N.B. search may have failed (e.g., hole in sparse file), 289 * N.B. search may have failed (e.g., hole in sparse file),
290 * and returned the index of the next entry. 290 * and returned the index of the next entry.
291 */ 291 */
292 //map: 292 //map:
293 /* retrieve search result */ 293 /* retrieve search result */
294 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 294 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
295 295
296 /* is xad on the next sibling page ? */ 296 /* is xad on the next sibling page ? */
297 if (index == le16_to_cpu(p->header.nextindex)) { 297 if (index == le16_to_cpu(p->header.nextindex)) {
298 if (p->header.flag & BT_ROOT) 298 if (p->header.flag & BT_ROOT)
299 goto mapend; 299 goto mapend;
300 300
301 if ((bn = le64_to_cpu(p->header.next)) == 0) 301 if ((bn = le64_to_cpu(p->header.next)) == 0)
302 goto mapend; 302 goto mapend;
303 303
304 XT_PUTPAGE(mp); 304 XT_PUTPAGE(mp);
305 305
306 /* get next sibling page */ 306 /* get next sibling page */
307 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 307 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
308 if (rc) 308 if (rc)
309 return rc; 309 return rc;
310 310
311 index = XTENTRYSTART; 311 index = XTENTRYSTART;
312 } 312 }
313 313
314 xad = &p->xad[index]; 314 xad = &p->xad[index];
315 315
316 /* 316 /*
317 * is lxd covered by xad ? 317 * is lxd covered by xad ?
318 */ 318 */
319 compare: 319 compare:
320 xstart = offsetXAD(xad); 320 xstart = offsetXAD(xad);
321 xlen = lengthXAD(xad); 321 xlen = lengthXAD(xad);
322 xend = xstart + xlen; 322 xend = xstart + xlen;
323 xaddr = addressXAD(xad); 323 xaddr = addressXAD(xad);
324 324
325 compare1: 325 compare1:
326 if (xstart < lstart) 326 if (xstart < lstart)
327 goto compare2; 327 goto compare2;
328 328
329 /* (lstart <= xstart) */ 329 /* (lstart <= xstart) */
330 330
331 /* lxd is NOT covered by xad */ 331 /* lxd is NOT covered by xad */
332 if (lend <= xstart) { 332 if (lend <= xstart) {
333 /* 333 /*
334 * get next lxd 334 * get next lxd
335 */ 335 */
336 if (--nlxd == 0) 336 if (--nlxd == 0)
337 goto mapend; 337 goto mapend;
338 lxd++; 338 lxd++;
339 339
340 lstart = offsetLXD(lxd); 340 lstart = offsetLXD(lxd);
341 llen = lengthLXD(lxd); 341 llen = lengthLXD(lxd);
342 lend = lstart + llen; 342 lend = lstart + llen;
343 if (lstart >= size) 343 if (lstart >= size)
344 goto mapend; 344 goto mapend;
345 345
346 /* compare with the current xad */ 346 /* compare with the current xad */
347 goto compare1; 347 goto compare1;
348 } 348 }
349 /* lxd is covered by xad */ 349 /* lxd is covered by xad */
350 else { /* (xstart < lend) */ 350 else { /* (xstart < lend) */
351 351
352 /* initialize new pxd */ 352 /* initialize new pxd */
353 pstart = xstart; 353 pstart = xstart;
354 plen = min(lend - xstart, xlen); 354 plen = min(lend - xstart, xlen);
355 paddr = xaddr; 355 paddr = xaddr;
356 356
357 goto cover; 357 goto cover;
358 } 358 }
359 359
360 /* (xstart < lstart) */ 360 /* (xstart < lstart) */
361 compare2: 361 compare2:
362 /* lxd is covered by xad */ 362 /* lxd is covered by xad */
363 if (lstart < xend) { 363 if (lstart < xend) {
364 /* initialize new pxd */ 364 /* initialize new pxd */
365 pstart = lstart; 365 pstart = lstart;
366 plen = min(xend - lstart, llen); 366 plen = min(xend - lstart, llen);
367 paddr = xaddr + (lstart - xstart); 367 paddr = xaddr + (lstart - xstart);
368 368
369 goto cover; 369 goto cover;
370 } 370 }
371 /* lxd is NOT covered by xad */ 371 /* lxd is NOT covered by xad */
372 else { /* (xend <= lstart) */ 372 else { /* (xend <= lstart) */
373 373
374 /* 374 /*
375 * get next xad 375 * get next xad
376 * 376 *
377 * linear search next xad covering lxd on 377 * linear search next xad covering lxd on
378 * the current xad page, and then tree search 378 * the current xad page, and then tree search
379 */ 379 */
380 if (index == le16_to_cpu(p->header.nextindex) - 1) { 380 if (index == le16_to_cpu(p->header.nextindex) - 1) {
381 if (p->header.flag & BT_ROOT) 381 if (p->header.flag & BT_ROOT)
382 goto mapend; 382 goto mapend;
383 383
384 XT_PUTPAGE(mp); 384 XT_PUTPAGE(mp);
385 goto search; 385 goto search;
386 } else { 386 } else {
387 index++; 387 index++;
388 xad++; 388 xad++;
389 389
390 /* compare with new xad */ 390 /* compare with new xad */
391 goto compare; 391 goto compare;
392 } 392 }
393 } 393 }
394 394
395 /* 395 /*
396 * lxd is covered by xad and a new pxd has been initialized 396 * lxd is covered by xad and a new pxd has been initialized
397 * (lstart <= xstart < lend) or (xstart < lstart < xend) 397 * (lstart <= xstart < lend) or (xstart < lstart < xend)
398 */ 398 */
399 cover: 399 cover:
400 /* finalize pxd corresponding to current xad */ 400 /* finalize pxd corresponding to current xad */
401 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); 401 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
402 402
403 if (++npxd >= maxnpxd) 403 if (++npxd >= maxnpxd)
404 goto mapend; 404 goto mapend;
405 pxd++; 405 pxd++;
406 406
407 /* 407 /*
408 * lxd is fully covered by xad 408 * lxd is fully covered by xad
409 */ 409 */
410 if (lend <= xend) { 410 if (lend <= xend) {
411 /* 411 /*
412 * get next lxd 412 * get next lxd
413 */ 413 */
414 if (--nlxd == 0) 414 if (--nlxd == 0)
415 goto mapend; 415 goto mapend;
416 lxd++; 416 lxd++;
417 417
418 lstart = offsetLXD(lxd); 418 lstart = offsetLXD(lxd);
419 llen = lengthLXD(lxd); 419 llen = lengthLXD(lxd);
420 lend = lstart + llen; 420 lend = lstart + llen;
421 if (lstart >= size) 421 if (lstart >= size)
422 goto mapend; 422 goto mapend;
423 423
424 /* 424 /*
425 * test for old xad covering new lxd 425 * test for old xad covering new lxd
426 * (old xstart < new lstart) 426 * (old xstart < new lstart)
427 */ 427 */
428 goto compare2; 428 goto compare2;
429 } 429 }
430 /* 430 /*
431 * lxd is partially covered by xad 431 * lxd is partially covered by xad
432 */ 432 */
433 else { /* (xend < lend) */ 433 else { /* (xend < lend) */
434 434
435 /* 435 /*
436 * get next xad 436 * get next xad
437 * 437 *
438 * linear search next xad covering lxd on 438 * linear search next xad covering lxd on
439 * the current xad page, and then next xad page search 439 * the current xad page, and then next xad page search
440 */ 440 */
441 if (index == le16_to_cpu(p->header.nextindex) - 1) { 441 if (index == le16_to_cpu(p->header.nextindex) - 1) {
442 if (p->header.flag & BT_ROOT) 442 if (p->header.flag & BT_ROOT)
443 goto mapend; 443 goto mapend;
444 444
445 if ((bn = le64_to_cpu(p->header.next)) == 0) 445 if ((bn = le64_to_cpu(p->header.next)) == 0)
446 goto mapend; 446 goto mapend;
447 447
448 XT_PUTPAGE(mp); 448 XT_PUTPAGE(mp);
449 449
450 /* get next sibling page */ 450 /* get next sibling page */
451 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 451 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
452 if (rc) 452 if (rc)
453 return rc; 453 return rc;
454 454
455 index = XTENTRYSTART; 455 index = XTENTRYSTART;
456 xad = &p->xad[index]; 456 xad = &p->xad[index];
457 } else { 457 } else {
458 index++; 458 index++;
459 xad++; 459 xad++;
460 } 460 }
461 461
462 /* 462 /*
463 * test for new xad covering old lxd 463 * test for new xad covering old lxd
464 * (old lstart < new xstart) 464 * (old lstart < new xstart)
465 */ 465 */
466 goto compare; 466 goto compare;
467 } 467 }
468 468
469 mapend: 469 mapend:
470 xadlist->nxad = npxd; 470 xadlist->nxad = npxd;
471 471
472 //out: 472 //out:
473 XT_PUTPAGE(mp); 473 XT_PUTPAGE(mp);
474 474
475 return rc; 475 return rc;
476 } 476 }
477 477
478 478
479 /* 479 /*
480 * xtSearch() 480 * xtSearch()
481 * 481 *
482 * function: search for the xad entry covering specified offset. 482 * function: search for the xad entry covering specified offset.
483 * 483 *
484 * parameters: 484 * parameters:
485 * ip - file object; 485 * ip - file object;
486 * xoff - extent offset; 486 * xoff - extent offset;
487 * nextp - address of next extent (if any) for search miss 487 * nextp - address of next extent (if any) for search miss
488 * cmpp - comparison result: 488 * cmpp - comparison result:
489 * btstack - traverse stack; 489 * btstack - traverse stack;
490 * flag - search process flag (XT_INSERT); 490 * flag - search process flag (XT_INSERT);
491 * 491 *
492 * returns: 492 * returns:
493 * btstack contains (bn, index) of search path traversed to the entry. 493 * btstack contains (bn, index) of search path traversed to the entry.
494 * *cmpp is set to result of comparison with the entry returned. 494 * *cmpp is set to result of comparison with the entry returned.
495 * the page containing the entry is pinned at exit. 495 * the page containing the entry is pinned at exit.
496 */ 496 */
497 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 497 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
498 int *cmpp, struct btstack * btstack, int flag) 498 int *cmpp, struct btstack * btstack, int flag)
499 { 499 {
500 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 500 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
501 int rc = 0; 501 int rc = 0;
502 int cmp = 1; /* init for empty page */ 502 int cmp = 1; /* init for empty page */
503 s64 bn; /* block number */ 503 s64 bn; /* block number */
504 struct metapage *mp; /* page buffer */ 504 struct metapage *mp; /* page buffer */
505 xtpage_t *p; /* page */ 505 xtpage_t *p; /* page */
506 xad_t *xad; 506 xad_t *xad;
507 int base, index, lim, btindex; 507 int base, index, lim, btindex;
508 struct btframe *btsp; 508 struct btframe *btsp;
509 int nsplit = 0; /* number of pages to split */ 509 int nsplit = 0; /* number of pages to split */
510 s64 t64; 510 s64 t64;
511 s64 next = 0; 511 s64 next = 0;
512 512
513 INCREMENT(xtStat.search); 513 INCREMENT(xtStat.search);
514 514
515 BT_CLR(btstack); 515 BT_CLR(btstack);
516 516
517 btstack->nsplit = 0; 517 btstack->nsplit = 0;
518 518
519 /* 519 /*
520 * search down tree from root: 520 * search down tree from root:
521 * 521 *
522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
523 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 523 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
524 * 524 *
525 * if entry with search key K is not found 525 * if entry with search key K is not found
526 * internal page search find the entry with largest key Ki 526 * internal page search find the entry with largest key Ki
527 * less than K which point to the child page to search; 527 * less than K which point to the child page to search;
528 * leaf page search find the entry with smallest key Kj 528 * leaf page search find the entry with smallest key Kj
529 * greater than K so that the returned index is the position of 529 * greater than K so that the returned index is the position of
530 * the entry to be shifted right for insertion of new entry. 530 * the entry to be shifted right for insertion of new entry.
531 * for empty tree, search key is greater than any key of the tree. 531 * for empty tree, search key is greater than any key of the tree.
532 * 532 *
533 * by convention, root bn = 0. 533 * by convention, root bn = 0.
534 */ 534 */
535 for (bn = 0;;) { 535 for (bn = 0;;) {
536 /* get/pin the page to search */ 536 /* get/pin the page to search */
537 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 537 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
538 if (rc) 538 if (rc)
539 return rc; 539 return rc;
540 540
541 /* try sequential access heuristics with the previous 541 /* try sequential access heuristics with the previous
542 * access entry in target leaf page: 542 * access entry in target leaf page:
543 * once search narrowed down into the target leaf, 543 * once search narrowed down into the target leaf,
544 * key must either match an entry in the leaf or 544 * key must either match an entry in the leaf or
545 * key entry does not exist in the tree; 545 * key entry does not exist in the tree;
546 */ 546 */
547 //fastSearch: 547 //fastSearch:
548 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 548 if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
549 (p->header.flag & BT_LEAF) && 549 (p->header.flag & BT_LEAF) &&
550 (index = jfs_ip->btindex) < 550 (index = jfs_ip->btindex) <
551 le16_to_cpu(p->header.nextindex)) { 551 le16_to_cpu(p->header.nextindex)) {
552 xad = &p->xad[index]; 552 xad = &p->xad[index];
553 t64 = offsetXAD(xad); 553 t64 = offsetXAD(xad);
554 if (xoff < t64 + lengthXAD(xad)) { 554 if (xoff < t64 + lengthXAD(xad)) {
555 if (xoff >= t64) { 555 if (xoff >= t64) {
556 *cmpp = 0; 556 *cmpp = 0;
557 goto out; 557 goto out;
558 } 558 }
559 559
560 /* stop sequential access heuristics */ 560 /* stop sequential access heuristics */
561 goto binarySearch; 561 goto binarySearch;
562 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 562 } else { /* (t64 + lengthXAD(xad)) <= xoff */
563 563
564 /* try next sequential entry */ 564 /* try next sequential entry */
565 index++; 565 index++;
566 if (index < 566 if (index <
567 le16_to_cpu(p->header.nextindex)) { 567 le16_to_cpu(p->header.nextindex)) {
568 xad++; 568 xad++;
569 t64 = offsetXAD(xad); 569 t64 = offsetXAD(xad);
570 if (xoff < t64 + lengthXAD(xad)) { 570 if (xoff < t64 + lengthXAD(xad)) {
571 if (xoff >= t64) { 571 if (xoff >= t64) {
572 *cmpp = 0; 572 *cmpp = 0;
573 goto out; 573 goto out;
574 } 574 }
575 575
576 /* miss: key falls between 576 /* miss: key falls between
577 * previous and this entry 577 * previous and this entry
578 */ 578 */
579 *cmpp = 1; 579 *cmpp = 1;
580 next = t64; 580 next = t64;
581 goto out; 581 goto out;
582 } 582 }
583 583
584 /* (xoff >= t64 + lengthXAD(xad)); 584 /* (xoff >= t64 + lengthXAD(xad));
585 * matching entry may be further out: 585 * matching entry may be further out:
586 * stop heuristic search 586 * stop heuristic search
587 */ 587 */
588 /* stop sequential access heuristics */ 588 /* stop sequential access heuristics */
589 goto binarySearch; 589 goto binarySearch;
590 } 590 }
591 591
592 /* (index == p->header.nextindex); 592 /* (index == p->header.nextindex);
593 * miss: key entry does not exist in 593 * miss: key entry does not exist in
594 * the target leaf/tree 594 * the target leaf/tree
595 */ 595 */
596 *cmpp = 1; 596 *cmpp = 1;
597 goto out; 597 goto out;
598 } 598 }
599 599
600 /* 600 /*
601 * if hit, return index of the entry found, and 601 * if hit, return index of the entry found, and
602 * if miss, where new entry with search key is 602 * if miss, where new entry with search key is
603 * to be inserted; 603 * to be inserted;
604 */ 604 */
605 out: 605 out:
606 /* compute number of pages to split */ 606 /* compute number of pages to split */
607 if (flag & XT_INSERT) { 607 if (flag & XT_INSERT) {
608 if (p->header.nextindex == /* little-endian */ 608 if (p->header.nextindex == /* little-endian */
609 p->header.maxentry) 609 p->header.maxentry)
610 nsplit++; 610 nsplit++;
611 else 611 else
612 nsplit = 0; 612 nsplit = 0;
613 btstack->nsplit = nsplit; 613 btstack->nsplit = nsplit;
614 } 614 }
615 615
616 /* save search result */ 616 /* save search result */
617 btsp = btstack->top; 617 btsp = btstack->top;
618 btsp->bn = bn; 618 btsp->bn = bn;
619 btsp->index = index; 619 btsp->index = index;
620 btsp->mp = mp; 620 btsp->mp = mp;
621 621
622 /* update sequential access heuristics */ 622 /* update sequential access heuristics */
623 jfs_ip->btindex = index; 623 jfs_ip->btindex = index;
624 624
625 if (nextp) 625 if (nextp)
626 *nextp = next; 626 *nextp = next;
627 627
628 INCREMENT(xtStat.fastSearch); 628 INCREMENT(xtStat.fastSearch);
629 return 0; 629 return 0;
630 } 630 }
631 631
632 /* well, ... full search now */ 632 /* well, ... full search now */
633 binarySearch: 633 binarySearch:
634 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 634 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
635 635
636 /* 636 /*
637 * binary search with search key K on the current page 637 * binary search with search key K on the current page
638 */ 638 */
639 for (base = XTENTRYSTART; lim; lim >>= 1) { 639 for (base = XTENTRYSTART; lim; lim >>= 1) {
640 index = base + (lim >> 1); 640 index = base + (lim >> 1);
641 641
642 XT_CMP(cmp, xoff, &p->xad[index], t64); 642 XT_CMP(cmp, xoff, &p->xad[index], t64);
643 if (cmp == 0) { 643 if (cmp == 0) {
644 /* 644 /*
645 * search hit 645 * search hit
646 */ 646 */
647 /* search hit - leaf page: 647 /* search hit - leaf page:
648 * return the entry found 648 * return the entry found
649 */ 649 */
650 if (p->header.flag & BT_LEAF) { 650 if (p->header.flag & BT_LEAF) {
651 *cmpp = cmp; 651 *cmpp = cmp;
652 652
653 /* compute number of pages to split */ 653 /* compute number of pages to split */
654 if (flag & XT_INSERT) { 654 if (flag & XT_INSERT) {
655 if (p->header.nextindex == 655 if (p->header.nextindex ==
656 p->header.maxentry) 656 p->header.maxentry)
657 nsplit++; 657 nsplit++;
658 else 658 else
659 nsplit = 0; 659 nsplit = 0;
660 btstack->nsplit = nsplit; 660 btstack->nsplit = nsplit;
661 } 661 }
662 662
663 /* save search result */ 663 /* save search result */
664 btsp = btstack->top; 664 btsp = btstack->top;
665 btsp->bn = bn; 665 btsp->bn = bn;
666 btsp->index = index; 666 btsp->index = index;
667 btsp->mp = mp; 667 btsp->mp = mp;
668 668
669 /* init sequential access heuristics */ 669 /* init sequential access heuristics */
670 btindex = jfs_ip->btindex; 670 btindex = jfs_ip->btindex;
671 if (index == btindex || 671 if (index == btindex ||
672 index == btindex + 1) 672 index == btindex + 1)
673 jfs_ip->btorder = BT_SEQUENTIAL; 673 jfs_ip->btorder = BT_SEQUENTIAL;
674 else 674 else
675 jfs_ip->btorder = BT_RANDOM; 675 jfs_ip->btorder = BT_RANDOM;
676 jfs_ip->btindex = index; 676 jfs_ip->btindex = index;
677 677
678 return 0; 678 return 0;
679 } 679 }
680 /* search hit - internal page: 680 /* search hit - internal page:
681 * descend/search its child page 681 * descend/search its child page
682 */ 682 */
683 if (index < le16_to_cpu(p->header.nextindex)-1) 683 if (index < le16_to_cpu(p->header.nextindex)-1)
684 next = offsetXAD(&p->xad[index + 1]); 684 next = offsetXAD(&p->xad[index + 1]);
685 goto next; 685 goto next;
686 } 686 }
687 687
688 if (cmp > 0) { 688 if (cmp > 0) {
689 base = index + 1; 689 base = index + 1;
690 --lim; 690 --lim;
691 } 691 }
692 } 692 }
693 693
694 /* 694 /*
695 * search miss 695 * search miss
696 * 696 *
697 * base is the smallest index with key (Kj) greater than 697 * base is the smallest index with key (Kj) greater than
698 * search key (K) and may be zero or maxentry index. 698 * search key (K) and may be zero or maxentry index.
699 */ 699 */
700 if (base < le16_to_cpu(p->header.nextindex)) 700 if (base < le16_to_cpu(p->header.nextindex))
701 next = offsetXAD(&p->xad[base]); 701 next = offsetXAD(&p->xad[base]);
702 /* 702 /*
703 * search miss - leaf page: 703 * search miss - leaf page:
704 * 704 *
705 * return location of entry (base) where new entry with 705 * return location of entry (base) where new entry with
706 * search key K is to be inserted. 706 * search key K is to be inserted.
707 */ 707 */
708 if (p->header.flag & BT_LEAF) { 708 if (p->header.flag & BT_LEAF) {
709 *cmpp = cmp; 709 *cmpp = cmp;
710 710
711 /* compute number of pages to split */ 711 /* compute number of pages to split */
712 if (flag & XT_INSERT) { 712 if (flag & XT_INSERT) {
713 if (p->header.nextindex == 713 if (p->header.nextindex ==
714 p->header.maxentry) 714 p->header.maxentry)
715 nsplit++; 715 nsplit++;
716 else 716 else
717 nsplit = 0; 717 nsplit = 0;
718 btstack->nsplit = nsplit; 718 btstack->nsplit = nsplit;
719 } 719 }
720 720
721 /* save search result */ 721 /* save search result */
722 btsp = btstack->top; 722 btsp = btstack->top;
723 btsp->bn = bn; 723 btsp->bn = bn;
724 btsp->index = base; 724 btsp->index = base;
725 btsp->mp = mp; 725 btsp->mp = mp;
726 726
727 /* init sequential access heuristics */ 727 /* init sequential access heuristics */
728 btindex = jfs_ip->btindex; 728 btindex = jfs_ip->btindex;
729 if (base == btindex || base == btindex + 1) 729 if (base == btindex || base == btindex + 1)
730 jfs_ip->btorder = BT_SEQUENTIAL; 730 jfs_ip->btorder = BT_SEQUENTIAL;
731 else 731 else
732 jfs_ip->btorder = BT_RANDOM; 732 jfs_ip->btorder = BT_RANDOM;
733 jfs_ip->btindex = base; 733 jfs_ip->btindex = base;
734 734
735 if (nextp) 735 if (nextp)
736 *nextp = next; 736 *nextp = next;
737 737
738 return 0; 738 return 0;
739 } 739 }
740 740
741 /* 741 /*
742 * search miss - non-leaf page: 742 * search miss - non-leaf page:
743 * 743 *
744 * if base is non-zero, decrement base by one to get the parent 744 * if base is non-zero, decrement base by one to get the parent
745 * entry of the child page to search. 745 * entry of the child page to search.
746 */ 746 */
747 index = base ? base - 1 : base; 747 index = base ? base - 1 : base;
748 748
749 /* 749 /*
750 * go down to child page 750 * go down to child page
751 */ 751 */
752 next: 752 next:
753 /* update number of pages to split */ 753 /* update number of pages to split */
754 if (p->header.nextindex == p->header.maxentry) 754 if (p->header.nextindex == p->header.maxentry)
755 nsplit++; 755 nsplit++;
756 else 756 else
757 nsplit = 0; 757 nsplit = 0;
758 758
759 /* push (bn, index) of the parent page/entry */ 759 /* push (bn, index) of the parent page/entry */
760 if (BT_STACK_FULL(btstack)) { 760 if (BT_STACK_FULL(btstack)) {
761 jfs_error(ip->i_sb, "stack overrun in xtSearch!"); 761 jfs_error(ip->i_sb, "stack overrun in xtSearch!");
762 XT_PUTPAGE(mp); 762 XT_PUTPAGE(mp);
763 return -EIO; 763 return -EIO;
764 } 764 }
765 BT_PUSH(btstack, bn, index); 765 BT_PUSH(btstack, bn, index);
766 766
767 /* get the child page block number */ 767 /* get the child page block number */
768 bn = addressXAD(&p->xad[index]); 768 bn = addressXAD(&p->xad[index]);
769 769
770 /* unpin the parent page */ 770 /* unpin the parent page */
771 XT_PUTPAGE(mp); 771 XT_PUTPAGE(mp);
772 } 772 }
773 } 773 }
774 774
775 /* 775 /*
776 * xtInsert() 776 * xtInsert()
777 * 777 *
778 * function: 778 * function:
779 * 779 *
780 * parameter: 780 * parameter:
781 * tid - transaction id; 781 * tid - transaction id;
782 * ip - file object; 782 * ip - file object;
783 * xflag - extent flag (XAD_NOTRECORDED): 783 * xflag - extent flag (XAD_NOTRECORDED):
784 * xoff - extent offset; 784 * xoff - extent offset;
785 * xlen - extent length; 785 * xlen - extent length;
786 * xaddrp - extent address pointer (in/out): 786 * xaddrp - extent address pointer (in/out):
787 * if (*xaddrp) 787 * if (*xaddrp)
788 * caller allocated data extent at *xaddrp; 788 * caller allocated data extent at *xaddrp;
789 * else 789 * else
790 * allocate data extent and return its xaddr; 790 * allocate data extent and return its xaddr;
791 * flag - 791 * flag -
792 * 792 *
793 * return: 793 * return:
794 */ 794 */
795 int xtInsert(tid_t tid, /* transaction id */ 795 int xtInsert(tid_t tid, /* transaction id */
796 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 796 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
797 int flag) 797 int flag)
798 { 798 {
799 int rc = 0; 799 int rc = 0;
800 s64 xaddr, hint; 800 s64 xaddr, hint;
801 struct metapage *mp; /* meta-page buffer */ 801 struct metapage *mp; /* meta-page buffer */
802 xtpage_t *p; /* base B+-tree index page */ 802 xtpage_t *p; /* base B+-tree index page */
803 s64 bn; 803 s64 bn;
804 int index, nextindex; 804 int index, nextindex;
805 struct btstack btstack; /* traverse stack */ 805 struct btstack btstack; /* traverse stack */
806 struct xtsplit split; /* split information */ 806 struct xtsplit split; /* split information */
807 xad_t *xad; 807 xad_t *xad;
808 int cmp; 808 int cmp;
809 s64 next; 809 s64 next;
810 struct tlock *tlck; 810 struct tlock *tlck;
811 struct xtlock *xtlck; 811 struct xtlock *xtlck;
812 812
813 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 813 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
814 814
815 /* 815 /*
816 * search for the entry location at which to insert: 816 * search for the entry location at which to insert:
817 * 817 *
818 * xtFastSearch() and xtSearch() both returns (leaf page 818 * xtFastSearch() and xtSearch() both returns (leaf page
819 * pinned, index at which to insert). 819 * pinned, index at which to insert).
820 * n.b. xtSearch() may return index of maxentry of 820 * n.b. xtSearch() may return index of maxentry of
821 * the full page. 821 * the full page.
822 */ 822 */
823 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 823 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
824 return rc; 824 return rc;
825 825
826 /* retrieve search result */ 826 /* retrieve search result */
827 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 827 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
828 828
829 /* This test must follow XT_GETSEARCH since mp must be valid if 829 /* This test must follow XT_GETSEARCH since mp must be valid if
830 * we branch to out: */ 830 * we branch to out: */
831 if ((cmp == 0) || (next && (xlen > next - xoff))) { 831 if ((cmp == 0) || (next && (xlen > next - xoff))) {
832 rc = -EEXIST; 832 rc = -EEXIST;
833 goto out; 833 goto out;
834 } 834 }
835 835
836 /* 836 /*
837 * allocate data extent requested 837 * allocate data extent requested
838 * 838 *
839 * allocation hint: last xad 839 * allocation hint: last xad
840 */ 840 */
841 if ((xaddr = *xaddrp) == 0) { 841 if ((xaddr = *xaddrp) == 0) {
842 if (index > XTENTRYSTART) { 842 if (index > XTENTRYSTART) {
843 xad = &p->xad[index - 1]; 843 xad = &p->xad[index - 1];
844 hint = addressXAD(xad) + lengthXAD(xad) - 1; 844 hint = addressXAD(xad) + lengthXAD(xad) - 1;
845 } else 845 } else
846 hint = 0; 846 hint = 0;
847 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) 847 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen)))
848 goto out; 848 goto out;
849 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 849 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
850 DQUOT_FREE_BLOCK(ip, xlen); 850 DQUOT_FREE_BLOCK(ip, xlen);
851 goto out; 851 goto out;
852 } 852 }
853 } 853 }
854 854
855 /* 855 /*
856 * insert entry for new extent 856 * insert entry for new extent
857 */ 857 */
858 xflag |= XAD_NEW; 858 xflag |= XAD_NEW;
859 859
860 /* 860 /*
861 * if the leaf page is full, split the page and 861 * if the leaf page is full, split the page and
862 * propagate up the router entry for the new page from split 862 * propagate up the router entry for the new page from split
863 * 863 *
864 * The xtSplitUp() will insert the entry and unpin the leaf page. 864 * The xtSplitUp() will insert the entry and unpin the leaf page.
865 */ 865 */
866 nextindex = le16_to_cpu(p->header.nextindex); 866 nextindex = le16_to_cpu(p->header.nextindex);
867 if (nextindex == le16_to_cpu(p->header.maxentry)) { 867 if (nextindex == le16_to_cpu(p->header.maxentry)) {
868 split.mp = mp; 868 split.mp = mp;
869 split.index = index; 869 split.index = index;
870 split.flag = xflag; 870 split.flag = xflag;
871 split.off = xoff; 871 split.off = xoff;
872 split.len = xlen; 872 split.len = xlen;
873 split.addr = xaddr; 873 split.addr = xaddr;
874 split.pxdlist = NULL; 874 split.pxdlist = NULL;
875 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 875 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
876 /* undo data extent allocation */ 876 /* undo data extent allocation */
877 if (*xaddrp == 0) { 877 if (*xaddrp == 0) {
878 dbFree(ip, xaddr, (s64) xlen); 878 dbFree(ip, xaddr, (s64) xlen);
879 DQUOT_FREE_BLOCK(ip, xlen); 879 DQUOT_FREE_BLOCK(ip, xlen);
880 } 880 }
881 return rc; 881 return rc;
882 } 882 }
883 883
884 *xaddrp = xaddr; 884 *xaddrp = xaddr;
885 return 0; 885 return 0;
886 } 886 }
887 887
888 /* 888 /*
889 * insert the new entry into the leaf page 889 * insert the new entry into the leaf page
890 */ 890 */
891 /* 891 /*
892 * acquire a transaction lock on the leaf page; 892 * acquire a transaction lock on the leaf page;
893 * 893 *
894 * action: xad insertion/extension; 894 * action: xad insertion/extension;
895 */ 895 */
896 BT_MARK_DIRTY(mp, ip); 896 BT_MARK_DIRTY(mp, ip);
897 897
898 /* if insert into middle, shift right remaining entries. */ 898 /* if insert into middle, shift right remaining entries. */
899 if (index < nextindex) 899 if (index < nextindex)
900 memmove(&p->xad[index + 1], &p->xad[index], 900 memmove(&p->xad[index + 1], &p->xad[index],
901 (nextindex - index) * sizeof(xad_t)); 901 (nextindex - index) * sizeof(xad_t));
902 902
903 /* insert the new entry: mark the entry NEW */ 903 /* insert the new entry: mark the entry NEW */
904 xad = &p->xad[index]; 904 xad = &p->xad[index];
905 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 905 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
906 906
907 /* advance next available entry index */ 907 /* advance next available entry index */
908 p->header.nextindex = 908 le16_add_cpu(&p->header.nextindex, 1);
909 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
910 909
911 /* Don't log it if there are no links to the file */ 910 /* Don't log it if there are no links to the file */
912 if (!test_cflag(COMMIT_Nolink, ip)) { 911 if (!test_cflag(COMMIT_Nolink, ip)) {
913 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 912 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
914 xtlck = (struct xtlock *) & tlck->lock; 913 xtlck = (struct xtlock *) & tlck->lock;
915 xtlck->lwm.offset = 914 xtlck->lwm.offset =
916 (xtlck->lwm.offset) ? min(index, 915 (xtlck->lwm.offset) ? min(index,
917 (int)xtlck->lwm.offset) : index; 916 (int)xtlck->lwm.offset) : index;
918 xtlck->lwm.length = 917 xtlck->lwm.length =
919 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 918 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
920 } 919 }
921 920
922 *xaddrp = xaddr; 921 *xaddrp = xaddr;
923 922
924 out: 923 out:
925 /* unpin the leaf page */ 924 /* unpin the leaf page */
926 XT_PUTPAGE(mp); 925 XT_PUTPAGE(mp);
927 926
928 return rc; 927 return rc;
929 } 928 }
930 929
931 930
932 /* 931 /*
933 * xtSplitUp() 932 * xtSplitUp()
934 * 933 *
935 * function: 934 * function:
936 * split full pages as propagating insertion up the tree 935 * split full pages as propagating insertion up the tree
937 * 936 *
938 * parameter: 937 * parameter:
939 * tid - transaction id; 938 * tid - transaction id;
940 * ip - file object; 939 * ip - file object;
941 * split - entry parameter descriptor; 940 * split - entry parameter descriptor;
942 * btstack - traverse stack from xtSearch() 941 * btstack - traverse stack from xtSearch()
943 * 942 *
944 * return: 943 * return:
945 */ 944 */
946 static int 945 static int
947 xtSplitUp(tid_t tid, 946 xtSplitUp(tid_t tid,
948 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 947 struct inode *ip, struct xtsplit * split, struct btstack * btstack)
949 { 948 {
950 int rc = 0; 949 int rc = 0;
951 struct metapage *smp; 950 struct metapage *smp;
952 xtpage_t *sp; /* split page */ 951 xtpage_t *sp; /* split page */
953 struct metapage *rmp; 952 struct metapage *rmp;
954 s64 rbn; /* new right page block number */ 953 s64 rbn; /* new right page block number */
955 struct metapage *rcmp; 954 struct metapage *rcmp;
956 xtpage_t *rcp; /* right child page */ 955 xtpage_t *rcp; /* right child page */
957 s64 rcbn; /* right child page block number */ 956 s64 rcbn; /* right child page block number */
958 int skip; /* index of entry of insertion */ 957 int skip; /* index of entry of insertion */
959 int nextindex; /* next available entry index of p */ 958 int nextindex; /* next available entry index of p */
960 struct btframe *parent; /* parent page entry on traverse stack */ 959 struct btframe *parent; /* parent page entry on traverse stack */
961 xad_t *xad; 960 xad_t *xad;
962 s64 xaddr; 961 s64 xaddr;
963 int xlen; 962 int xlen;
964 int nsplit; /* number of pages split */ 963 int nsplit; /* number of pages split */
965 struct pxdlist pxdlist; 964 struct pxdlist pxdlist;
966 pxd_t *pxd; 965 pxd_t *pxd;
967 struct tlock *tlck; 966 struct tlock *tlck;
968 struct xtlock *xtlck; 967 struct xtlock *xtlck;
969 968
970 smp = split->mp; 969 smp = split->mp;
971 sp = XT_PAGE(ip, smp); 970 sp = XT_PAGE(ip, smp);
972 971
973 /* is inode xtree root extension/inline EA area free ? */ 972 /* is inode xtree root extension/inline EA area free ? */
974 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 973 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
975 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 974 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
976 (JFS_IP(ip)->mode2 & INLINEEA)) { 975 (JFS_IP(ip)->mode2 & INLINEEA)) {
977 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 976 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
978 JFS_IP(ip)->mode2 &= ~INLINEEA; 977 JFS_IP(ip)->mode2 &= ~INLINEEA;
979 978
980 BT_MARK_DIRTY(smp, ip); 979 BT_MARK_DIRTY(smp, ip);
981 /* 980 /*
982 * acquire a transaction lock on the leaf page; 981 * acquire a transaction lock on the leaf page;
983 * 982 *
984 * action: xad insertion/extension; 983 * action: xad insertion/extension;
985 */ 984 */
986 985
987 /* if insert into middle, shift right remaining entries. */ 986 /* if insert into middle, shift right remaining entries. */
988 skip = split->index; 987 skip = split->index;
989 nextindex = le16_to_cpu(sp->header.nextindex); 988 nextindex = le16_to_cpu(sp->header.nextindex);
990 if (skip < nextindex) 989 if (skip < nextindex)
991 memmove(&sp->xad[skip + 1], &sp->xad[skip], 990 memmove(&sp->xad[skip + 1], &sp->xad[skip],
992 (nextindex - skip) * sizeof(xad_t)); 991 (nextindex - skip) * sizeof(xad_t));
993 992
994 /* insert the new entry: mark the entry NEW */ 993 /* insert the new entry: mark the entry NEW */
995 xad = &sp->xad[skip]; 994 xad = &sp->xad[skip];
996 XT_PUTENTRY(xad, split->flag, split->off, split->len, 995 XT_PUTENTRY(xad, split->flag, split->off, split->len,
997 split->addr); 996 split->addr);
998 997
999 /* advance next available entry index */ 998 /* advance next available entry index */
1000 sp->header.nextindex = 999 le16_add_cpu(&sp->header.nextindex, 1);
1001 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
1002 1000
1003 /* Don't log it if there are no links to the file */ 1001 /* Don't log it if there are no links to the file */
1004 if (!test_cflag(COMMIT_Nolink, ip)) { 1002 if (!test_cflag(COMMIT_Nolink, ip)) {
1005 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1003 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1006 xtlck = (struct xtlock *) & tlck->lock; 1004 xtlck = (struct xtlock *) & tlck->lock;
1007 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1005 xtlck->lwm.offset = (xtlck->lwm.offset) ?
1008 min(skip, (int)xtlck->lwm.offset) : skip; 1006 min(skip, (int)xtlck->lwm.offset) : skip;
1009 xtlck->lwm.length = 1007 xtlck->lwm.length =
1010 le16_to_cpu(sp->header.nextindex) - 1008 le16_to_cpu(sp->header.nextindex) -
1011 xtlck->lwm.offset; 1009 xtlck->lwm.offset;
1012 } 1010 }
1013 1011
1014 return 0; 1012 return 0;
1015 } 1013 }
1016 1014
1017 /* 1015 /*
1018 * allocate new index blocks to cover index page split(s) 1016 * allocate new index blocks to cover index page split(s)
1019 * 1017 *
1020 * allocation hint: ? 1018 * allocation hint: ?
1021 */ 1019 */
1022 if (split->pxdlist == NULL) { 1020 if (split->pxdlist == NULL) {
1023 nsplit = btstack->nsplit; 1021 nsplit = btstack->nsplit;
1024 split->pxdlist = &pxdlist; 1022 split->pxdlist = &pxdlist;
1025 pxdlist.maxnpxd = pxdlist.npxd = 0; 1023 pxdlist.maxnpxd = pxdlist.npxd = 0;
1026 pxd = &pxdlist.pxd[0]; 1024 pxd = &pxdlist.pxd[0];
1027 xlen = JFS_SBI(ip->i_sb)->nbperpage; 1025 xlen = JFS_SBI(ip->i_sb)->nbperpage;
1028 for (; nsplit > 0; nsplit--, pxd++) { 1026 for (; nsplit > 0; nsplit--, pxd++) {
1029 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 1027 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
1030 == 0) { 1028 == 0) {
1031 PXDaddress(pxd, xaddr); 1029 PXDaddress(pxd, xaddr);
1032 PXDlength(pxd, xlen); 1030 PXDlength(pxd, xlen);
1033 1031
1034 pxdlist.maxnpxd++; 1032 pxdlist.maxnpxd++;
1035 1033
1036 continue; 1034 continue;
1037 } 1035 }
1038 1036
1039 /* undo allocation */ 1037 /* undo allocation */
1040 1038
1041 XT_PUTPAGE(smp); 1039 XT_PUTPAGE(smp);
1042 return rc; 1040 return rc;
1043 } 1041 }
1044 } 1042 }
1045 1043
1046 /* 1044 /*
1047 * Split leaf page <sp> into <sp> and a new right page <rp>. 1045 * Split leaf page <sp> into <sp> and a new right page <rp>.
1048 * 1046 *
1049 * The split routines insert the new entry into the leaf page, 1047 * The split routines insert the new entry into the leaf page,
1050 * and acquire txLock as appropriate. 1048 * and acquire txLock as appropriate.
1051 * return <rp> pinned and its block number <rpbn>. 1049 * return <rp> pinned and its block number <rpbn>.
1052 */ 1050 */
1053 rc = (sp->header.flag & BT_ROOT) ? 1051 rc = (sp->header.flag & BT_ROOT) ?
1054 xtSplitRoot(tid, ip, split, &rmp) : 1052 xtSplitRoot(tid, ip, split, &rmp) :
1055 xtSplitPage(tid, ip, split, &rmp, &rbn); 1053 xtSplitPage(tid, ip, split, &rmp, &rbn);
1056 1054
1057 XT_PUTPAGE(smp); 1055 XT_PUTPAGE(smp);
1058 1056
1059 if (rc) 1057 if (rc)
1060 return -EIO; 1058 return -EIO;
1061 /* 1059 /*
1062 * propagate up the router entry for the leaf page just split 1060 * propagate up the router entry for the leaf page just split
1063 * 1061 *
1064 * insert a router entry for the new page into the parent page, 1062 * insert a router entry for the new page into the parent page,
1065 * propagate the insert/split up the tree by walking back the stack 1063 * propagate the insert/split up the tree by walking back the stack
1066 * of (bn of parent page, index of child page entry in parent page) 1064 * of (bn of parent page, index of child page entry in parent page)
1067 * that were traversed during the search for the page that split. 1065 * that were traversed during the search for the page that split.
1068 * 1066 *
1069 * the propagation of insert/split up the tree stops if the root 1067 * the propagation of insert/split up the tree stops if the root
1070 * splits or the page inserted into doesn't have to split to hold 1068 * splits or the page inserted into doesn't have to split to hold
1071 * the new entry. 1069 * the new entry.
1072 * 1070 *
1073 * the parent entry for the split page remains the same, and 1071 * the parent entry for the split page remains the same, and
1074 * a new entry is inserted at its right with the first key and 1072 * a new entry is inserted at its right with the first key and
1075 * block number of the new right page. 1073 * block number of the new right page.
1076 * 1074 *
1077 * There are a maximum of 3 pages pinned at any time: 1075 * There are a maximum of 3 pages pinned at any time:
1078 * right child, left parent and right parent (when the parent splits) 1076 * right child, left parent and right parent (when the parent splits)
1079 * to keep the child page pinned while working on the parent. 1077 * to keep the child page pinned while working on the parent.
1080 * make sure that all pins are released at exit. 1078 * make sure that all pins are released at exit.
1081 */ 1079 */
1082 while ((parent = BT_POP(btstack)) != NULL) { 1080 while ((parent = BT_POP(btstack)) != NULL) {
1083 /* parent page specified by stack frame <parent> */ 1081 /* parent page specified by stack frame <parent> */
1084 1082
1085 /* keep current child pages <rcp> pinned */ 1083 /* keep current child pages <rcp> pinned */
1086 rcmp = rmp; 1084 rcmp = rmp;
1087 rcbn = rbn; 1085 rcbn = rbn;
1088 rcp = XT_PAGE(ip, rcmp); 1086 rcp = XT_PAGE(ip, rcmp);
1089 1087
1090 /* 1088 /*
1091 * insert router entry in parent for new right child page <rp> 1089 * insert router entry in parent for new right child page <rp>
1092 */ 1090 */
1093 /* get/pin the parent page <sp> */ 1091 /* get/pin the parent page <sp> */
1094 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1092 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1095 if (rc) { 1093 if (rc) {
1096 XT_PUTPAGE(rcmp); 1094 XT_PUTPAGE(rcmp);
1097 return rc; 1095 return rc;
1098 } 1096 }
1099 1097
1100 /* 1098 /*
1101 * The new key entry goes ONE AFTER the index of parent entry, 1099 * The new key entry goes ONE AFTER the index of parent entry,
1102 * because the split was to the right. 1100 * because the split was to the right.
1103 */ 1101 */
1104 skip = parent->index + 1; 1102 skip = parent->index + 1;
1105 1103
1106 /* 1104 /*
1107 * split or shift right remaining entries of the parent page 1105 * split or shift right remaining entries of the parent page
1108 */ 1106 */
1109 nextindex = le16_to_cpu(sp->header.nextindex); 1107 nextindex = le16_to_cpu(sp->header.nextindex);
1110 /* 1108 /*
1111 * parent page is full - split the parent page 1109 * parent page is full - split the parent page
1112 */ 1110 */
1113 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 1111 if (nextindex == le16_to_cpu(sp->header.maxentry)) {
1114 /* init for parent page split */ 1112 /* init for parent page split */
1115 split->mp = smp; 1113 split->mp = smp;
1116 split->index = skip; /* index at insert */ 1114 split->index = skip; /* index at insert */
1117 split->flag = XAD_NEW; 1115 split->flag = XAD_NEW;
1118 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 1116 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
1119 split->len = JFS_SBI(ip->i_sb)->nbperpage; 1117 split->len = JFS_SBI(ip->i_sb)->nbperpage;
1120 split->addr = rcbn; 1118 split->addr = rcbn;
1121 1119
1122 /* unpin previous right child page */ 1120 /* unpin previous right child page */
1123 XT_PUTPAGE(rcmp); 1121 XT_PUTPAGE(rcmp);
1124 1122
1125 /* The split routines insert the new entry, 1123 /* The split routines insert the new entry,
1126 * and acquire txLock as appropriate. 1124 * and acquire txLock as appropriate.
1127 * return <rp> pinned and its block number <rpbn>. 1125 * return <rp> pinned and its block number <rpbn>.
1128 */ 1126 */
1129 rc = (sp->header.flag & BT_ROOT) ? 1127 rc = (sp->header.flag & BT_ROOT) ?
1130 xtSplitRoot(tid, ip, split, &rmp) : 1128 xtSplitRoot(tid, ip, split, &rmp) :
1131 xtSplitPage(tid, ip, split, &rmp, &rbn); 1129 xtSplitPage(tid, ip, split, &rmp, &rbn);
1132 if (rc) { 1130 if (rc) {
1133 XT_PUTPAGE(smp); 1131 XT_PUTPAGE(smp);
1134 return rc; 1132 return rc;
1135 } 1133 }
1136 1134
1137 XT_PUTPAGE(smp); 1135 XT_PUTPAGE(smp);
1138 /* keep new child page <rp> pinned */ 1136 /* keep new child page <rp> pinned */
1139 } 1137 }
1140 /* 1138 /*
1141 * parent page is not full - insert in parent page 1139 * parent page is not full - insert in parent page
1142 */ 1140 */
1143 else { 1141 else {
1144 /* 1142 /*
1145 * insert router entry in parent for the right child 1143 * insert router entry in parent for the right child
1146 * page from the first entry of the right child page: 1144 * page from the first entry of the right child page:
1147 */ 1145 */
1148 /* 1146 /*
1149 * acquire a transaction lock on the parent page; 1147 * acquire a transaction lock on the parent page;
1150 * 1148 *
1151 * action: router xad insertion; 1149 * action: router xad insertion;
1152 */ 1150 */
1153 BT_MARK_DIRTY(smp, ip); 1151 BT_MARK_DIRTY(smp, ip);
1154 1152
1155 /* 1153 /*
1156 * if insert into middle, shift right remaining entries 1154 * if insert into middle, shift right remaining entries
1157 */ 1155 */
1158 if (skip < nextindex) 1156 if (skip < nextindex)
1159 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1157 memmove(&sp->xad[skip + 1], &sp->xad[skip],
1160 (nextindex - 1158 (nextindex -
1161 skip) << L2XTSLOTSIZE); 1159 skip) << L2XTSLOTSIZE);
1162 1160
1163 /* insert the router entry */ 1161 /* insert the router entry */
1164 xad = &sp->xad[skip]; 1162 xad = &sp->xad[skip];
1165 XT_PUTENTRY(xad, XAD_NEW, 1163 XT_PUTENTRY(xad, XAD_NEW,
1166 offsetXAD(&rcp->xad[XTENTRYSTART]), 1164 offsetXAD(&rcp->xad[XTENTRYSTART]),
1167 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1165 JFS_SBI(ip->i_sb)->nbperpage, rcbn);
1168 1166
1169 /* advance next available entry index. */ 1167 /* advance next available entry index. */
1170 sp->header.nextindex = 1168 le16_add_cpu(&sp->header.nextindex, 1);
1171 cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
1172 1);
1173 1169
1174 /* Don't log it if there are no links to the file */ 1170 /* Don't log it if there are no links to the file */
1175 if (!test_cflag(COMMIT_Nolink, ip)) { 1171 if (!test_cflag(COMMIT_Nolink, ip)) {
1176 tlck = txLock(tid, ip, smp, 1172 tlck = txLock(tid, ip, smp,
1177 tlckXTREE | tlckGROW); 1173 tlckXTREE | tlckGROW);
1178 xtlck = (struct xtlock *) & tlck->lock; 1174 xtlck = (struct xtlock *) & tlck->lock;
1179 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1175 xtlck->lwm.offset = (xtlck->lwm.offset) ?
1180 min(skip, (int)xtlck->lwm.offset) : skip; 1176 min(skip, (int)xtlck->lwm.offset) : skip;
1181 xtlck->lwm.length = 1177 xtlck->lwm.length =
1182 le16_to_cpu(sp->header.nextindex) - 1178 le16_to_cpu(sp->header.nextindex) -
1183 xtlck->lwm.offset; 1179 xtlck->lwm.offset;
1184 } 1180 }
1185 1181
1186 /* unpin parent page */ 1182 /* unpin parent page */
1187 XT_PUTPAGE(smp); 1183 XT_PUTPAGE(smp);
1188 1184
1189 /* exit propagate up */ 1185 /* exit propagate up */
1190 break; 1186 break;
1191 } 1187 }
1192 } 1188 }
1193 1189
1194 /* unpin current right page */ 1190 /* unpin current right page */
1195 XT_PUTPAGE(rmp); 1191 XT_PUTPAGE(rmp);
1196 1192
1197 return 0; 1193 return 0;
1198 } 1194 }
1199 1195
1200 1196
1201 /* 1197 /*
1202 * xtSplitPage() 1198 * xtSplitPage()
1203 * 1199 *
1204 * function: 1200 * function:
1205 * split a full non-root page into 1201 * split a full non-root page into
1206 * original/split/left page and new right page 1202 * original/split/left page and new right page
1207 * i.e., the original/split page remains as left page. 1203 * i.e., the original/split page remains as left page.
1208 * 1204 *
1209 * parameter: 1205 * parameter:
1210 * int tid, 1206 * int tid,
1211 * struct inode *ip, 1207 * struct inode *ip,
1212 * struct xtsplit *split, 1208 * struct xtsplit *split,
1213 * struct metapage **rmpp, 1209 * struct metapage **rmpp,
1214 * u64 *rbnp, 1210 * u64 *rbnp,
1215 * 1211 *
1216 * return: 1212 * return:
1217 * Pointer to page in which to insert or NULL on error. 1213 * Pointer to page in which to insert or NULL on error.
1218 */ 1214 */
1219 static int 1215 static int
1220 xtSplitPage(tid_t tid, struct inode *ip, 1216 xtSplitPage(tid_t tid, struct inode *ip,
1221 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 1217 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
1222 { 1218 {
1223 int rc = 0; 1219 int rc = 0;
1224 struct metapage *smp; 1220 struct metapage *smp;
1225 xtpage_t *sp; 1221 xtpage_t *sp;
1226 struct metapage *rmp; 1222 struct metapage *rmp;
1227 xtpage_t *rp; /* new right page allocated */ 1223 xtpage_t *rp; /* new right page allocated */
1228 s64 rbn; /* new right page block number */ 1224 s64 rbn; /* new right page block number */
1229 struct metapage *mp; 1225 struct metapage *mp;
1230 xtpage_t *p; 1226 xtpage_t *p;
1231 s64 nextbn; 1227 s64 nextbn;
1232 int skip, maxentry, middle, righthalf, n; 1228 int skip, maxentry, middle, righthalf, n;
1233 xad_t *xad; 1229 xad_t *xad;
1234 struct pxdlist *pxdlist; 1230 struct pxdlist *pxdlist;
1235 pxd_t *pxd; 1231 pxd_t *pxd;
1236 struct tlock *tlck; 1232 struct tlock *tlck;
1237 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 1233 struct xtlock *sxtlck = NULL, *rxtlck = NULL;
1238 int quota_allocation = 0; 1234 int quota_allocation = 0;
1239 1235
1240 smp = split->mp; 1236 smp = split->mp;
1241 sp = XT_PAGE(ip, smp); 1237 sp = XT_PAGE(ip, smp);
1242 1238
1243 INCREMENT(xtStat.split); 1239 INCREMENT(xtStat.split);
1244 1240
1245 pxdlist = split->pxdlist; 1241 pxdlist = split->pxdlist;
1246 pxd = &pxdlist->pxd[pxdlist->npxd]; 1242 pxd = &pxdlist->pxd[pxdlist->npxd];
1247 pxdlist->npxd++; 1243 pxdlist->npxd++;
1248 rbn = addressPXD(pxd); 1244 rbn = addressPXD(pxd);
1249 1245
1250 /* Allocate blocks to quota. */ 1246 /* Allocate blocks to quota. */
1251 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1247 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1252 rc = -EDQUOT; 1248 rc = -EDQUOT;
1253 goto clean_up; 1249 goto clean_up;
1254 } 1250 }
1255 1251
1256 quota_allocation += lengthPXD(pxd); 1252 quota_allocation += lengthPXD(pxd);
1257 1253
1258 /* 1254 /*
1259 * allocate the new right page for the split 1255 * allocate the new right page for the split
1260 */ 1256 */
1261 rmp = get_metapage(ip, rbn, PSIZE, 1); 1257 rmp = get_metapage(ip, rbn, PSIZE, 1);
1262 if (rmp == NULL) { 1258 if (rmp == NULL) {
1263 rc = -EIO; 1259 rc = -EIO;
1264 goto clean_up; 1260 goto clean_up;
1265 } 1261 }
1266 1262
1267 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1263 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1268 1264
1269 BT_MARK_DIRTY(rmp, ip); 1265 BT_MARK_DIRTY(rmp, ip);
1270 /* 1266 /*
1271 * action: new page; 1267 * action: new page;
1272 */ 1268 */
1273 1269
1274 rp = (xtpage_t *) rmp->data; 1270 rp = (xtpage_t *) rmp->data;
1275 rp->header.self = *pxd; 1271 rp->header.self = *pxd;
1276 rp->header.flag = sp->header.flag & BT_TYPE; 1272 rp->header.flag = sp->header.flag & BT_TYPE;
1277 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1273 rp->header.maxentry = sp->header.maxentry; /* little-endian */
1278 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1274 rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1279 1275
1280 BT_MARK_DIRTY(smp, ip); 1276 BT_MARK_DIRTY(smp, ip);
1281 /* Don't log it if there are no links to the file */ 1277 /* Don't log it if there are no links to the file */
1282 if (!test_cflag(COMMIT_Nolink, ip)) { 1278 if (!test_cflag(COMMIT_Nolink, ip)) {
1283 /* 1279 /*
1284 * acquire a transaction lock on the new right page; 1280 * acquire a transaction lock on the new right page;
1285 */ 1281 */
1286 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1282 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1287 rxtlck = (struct xtlock *) & tlck->lock; 1283 rxtlck = (struct xtlock *) & tlck->lock;
1288 rxtlck->lwm.offset = XTENTRYSTART; 1284 rxtlck->lwm.offset = XTENTRYSTART;
1289 /* 1285 /*
1290 * acquire a transaction lock on the split page 1286 * acquire a transaction lock on the split page
1291 */ 1287 */
1292 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1288 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1293 sxtlck = (struct xtlock *) & tlck->lock; 1289 sxtlck = (struct xtlock *) & tlck->lock;
1294 } 1290 }
1295 1291
1296 /* 1292 /*
1297 * initialize/update sibling pointers of <sp> and <rp> 1293 * initialize/update sibling pointers of <sp> and <rp>
1298 */ 1294 */
1299 nextbn = le64_to_cpu(sp->header.next); 1295 nextbn = le64_to_cpu(sp->header.next);
1300 rp->header.next = cpu_to_le64(nextbn); 1296 rp->header.next = cpu_to_le64(nextbn);
1301 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1297 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1302 sp->header.next = cpu_to_le64(rbn); 1298 sp->header.next = cpu_to_le64(rbn);
1303 1299
1304 skip = split->index; 1300 skip = split->index;
1305 1301
1306 /* 1302 /*
1307 * sequential append at tail (after last entry of last page) 1303 * sequential append at tail (after last entry of last page)
1308 * 1304 *
1309 * if splitting the last page on a level because of appending 1305 * if splitting the last page on a level because of appending
1310 * a entry to it (skip is maxentry), it's likely that the access is 1306 * a entry to it (skip is maxentry), it's likely that the access is
1311 * sequential. adding an empty page on the side of the level is less 1307 * sequential. adding an empty page on the side of the level is less
1312 * work and can push the fill factor much higher than normal. 1308 * work and can push the fill factor much higher than normal.
1313 * if we're wrong it's no big deal - we will do the split the right 1309 * if we're wrong it's no big deal - we will do the split the right
1314 * way next time. 1310 * way next time.
1315 * (it may look like it's equally easy to do a similar hack for 1311 * (it may look like it's equally easy to do a similar hack for
1316 * reverse sorted data, that is, split the tree left, but it's not. 1312 * reverse sorted data, that is, split the tree left, but it's not.
1317 * Be my guest.) 1313 * Be my guest.)
1318 */ 1314 */
1319 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1315 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1320 /* 1316 /*
1321 * acquire a transaction lock on the new/right page; 1317 * acquire a transaction lock on the new/right page;
1322 * 1318 *
1323 * action: xad insertion; 1319 * action: xad insertion;
1324 */ 1320 */
1325 /* insert entry at the first entry of the new right page */ 1321 /* insert entry at the first entry of the new right page */
1326 xad = &rp->xad[XTENTRYSTART]; 1322 xad = &rp->xad[XTENTRYSTART];
1327 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1323 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1328 split->addr); 1324 split->addr);
1329 1325
1330 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1326 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1331 1327
1332 if (!test_cflag(COMMIT_Nolink, ip)) { 1328 if (!test_cflag(COMMIT_Nolink, ip)) {
1333 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1329 /* rxtlck->lwm.offset = XTENTRYSTART; */
1334 rxtlck->lwm.length = 1; 1330 rxtlck->lwm.length = 1;
1335 } 1331 }
1336 1332
1337 *rmpp = rmp; 1333 *rmpp = rmp;
1338 *rbnp = rbn; 1334 *rbnp = rbn;
1339 1335
1340 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1336 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1341 return 0; 1337 return 0;
1342 } 1338 }
1343 1339
1344 /* 1340 /*
1345 * non-sequential insert (at possibly middle page) 1341 * non-sequential insert (at possibly middle page)
1346 */ 1342 */
1347 1343
1348 /* 1344 /*
1349 * update previous pointer of old next/right page of <sp> 1345 * update previous pointer of old next/right page of <sp>
1350 */ 1346 */
1351 if (nextbn != 0) { 1347 if (nextbn != 0) {
1352 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1348 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1353 if (rc) { 1349 if (rc) {
1354 XT_PUTPAGE(rmp); 1350 XT_PUTPAGE(rmp);
1355 goto clean_up; 1351 goto clean_up;
1356 } 1352 }
1357 1353
1358 BT_MARK_DIRTY(mp, ip); 1354 BT_MARK_DIRTY(mp, ip);
1359 /* 1355 /*
1360 * acquire a transaction lock on the next page; 1356 * acquire a transaction lock on the next page;
1361 * 1357 *
1362 * action:sibling pointer update; 1358 * action:sibling pointer update;
1363 */ 1359 */
1364 if (!test_cflag(COMMIT_Nolink, ip)) 1360 if (!test_cflag(COMMIT_Nolink, ip))
1365 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1361 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1366 1362
1367 p->header.prev = cpu_to_le64(rbn); 1363 p->header.prev = cpu_to_le64(rbn);
1368 1364
1369 /* sibling page may have been updated previously, or 1365 /* sibling page may have been updated previously, or
1370 * it may be updated later; 1366 * it may be updated later;
1371 */ 1367 */
1372 1368
1373 XT_PUTPAGE(mp); 1369 XT_PUTPAGE(mp);
1374 } 1370 }
1375 1371
1376 /* 1372 /*
1377 * split the data between the split and new/right pages 1373 * split the data between the split and new/right pages
1378 */ 1374 */
1379 maxentry = le16_to_cpu(sp->header.maxentry); 1375 maxentry = le16_to_cpu(sp->header.maxentry);
1380 middle = maxentry >> 1; 1376 middle = maxentry >> 1;
1381 righthalf = maxentry - middle; 1377 righthalf = maxentry - middle;
1382 1378
1383 /* 1379 /*
1384 * skip index in old split/left page - insert into left page: 1380 * skip index in old split/left page - insert into left page:
1385 */ 1381 */
1386 if (skip <= middle) { 1382 if (skip <= middle) {
1387 /* move right half of split page to the new right page */ 1383 /* move right half of split page to the new right page */
1388 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1384 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1389 righthalf << L2XTSLOTSIZE); 1385 righthalf << L2XTSLOTSIZE);
1390 1386
1391 /* shift right tail of left half to make room for new entry */ 1387 /* shift right tail of left half to make room for new entry */
1392 if (skip < middle) 1388 if (skip < middle)
1393 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1389 memmove(&sp->xad[skip + 1], &sp->xad[skip],
1394 (middle - skip) << L2XTSLOTSIZE); 1390 (middle - skip) << L2XTSLOTSIZE);
1395 1391
1396 /* insert new entry */ 1392 /* insert new entry */
1397 xad = &sp->xad[skip]; 1393 xad = &sp->xad[skip];
1398 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1394 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1399 split->addr); 1395 split->addr);
1400 1396
1401 /* update page header */ 1397 /* update page header */
1402 sp->header.nextindex = cpu_to_le16(middle + 1); 1398 sp->header.nextindex = cpu_to_le16(middle + 1);
1403 if (!test_cflag(COMMIT_Nolink, ip)) { 1399 if (!test_cflag(COMMIT_Nolink, ip)) {
1404 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1400 sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1405 min(skip, (int)sxtlck->lwm.offset) : skip; 1401 min(skip, (int)sxtlck->lwm.offset) : skip;
1406 } 1402 }
1407 1403
1408 rp->header.nextindex = 1404 rp->header.nextindex =
1409 cpu_to_le16(XTENTRYSTART + righthalf); 1405 cpu_to_le16(XTENTRYSTART + righthalf);
1410 } 1406 }
1411 /* 1407 /*
1412 * skip index in new right page - insert into right page: 1408 * skip index in new right page - insert into right page:
1413 */ 1409 */
1414 else { 1410 else {
1415 /* move left head of right half to right page */ 1411 /* move left head of right half to right page */
1416 n = skip - middle; 1412 n = skip - middle;
1417 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1413 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1418 n << L2XTSLOTSIZE); 1414 n << L2XTSLOTSIZE);
1419 1415
1420 /* insert new entry */ 1416 /* insert new entry */
1421 n += XTENTRYSTART; 1417 n += XTENTRYSTART;
1422 xad = &rp->xad[n]; 1418 xad = &rp->xad[n];
1423 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1419 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1424 split->addr); 1420 split->addr);
1425 1421
1426 /* move right tail of right half to right page */ 1422 /* move right tail of right half to right page */
1427 if (skip < maxentry) 1423 if (skip < maxentry)
1428 memmove(&rp->xad[n + 1], &sp->xad[skip], 1424 memmove(&rp->xad[n + 1], &sp->xad[skip],
1429 (maxentry - skip) << L2XTSLOTSIZE); 1425 (maxentry - skip) << L2XTSLOTSIZE);
1430 1426
1431 /* update page header */ 1427 /* update page header */
1432 sp->header.nextindex = cpu_to_le16(middle); 1428 sp->header.nextindex = cpu_to_le16(middle);
1433 if (!test_cflag(COMMIT_Nolink, ip)) { 1429 if (!test_cflag(COMMIT_Nolink, ip)) {
1434 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1430 sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1435 min(middle, (int)sxtlck->lwm.offset) : middle; 1431 min(middle, (int)sxtlck->lwm.offset) : middle;
1436 } 1432 }
1437 1433
1438 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1434 rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1439 righthalf + 1); 1435 righthalf + 1);
1440 } 1436 }
1441 1437
1442 if (!test_cflag(COMMIT_Nolink, ip)) { 1438 if (!test_cflag(COMMIT_Nolink, ip)) {
1443 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1439 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1444 sxtlck->lwm.offset; 1440 sxtlck->lwm.offset;
1445 1441
1446 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1442 /* rxtlck->lwm.offset = XTENTRYSTART; */
1447 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1443 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1448 XTENTRYSTART; 1444 XTENTRYSTART;
1449 } 1445 }
1450 1446
1451 *rmpp = rmp; 1447 *rmpp = rmp;
1452 *rbnp = rbn; 1448 *rbnp = rbn;
1453 1449
1454 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1450 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1455 return rc; 1451 return rc;
1456 1452
1457 clean_up: 1453 clean_up:
1458 1454
1459 /* Rollback quota allocation. */ 1455 /* Rollback quota allocation. */
1460 if (quota_allocation) 1456 if (quota_allocation)
1461 DQUOT_FREE_BLOCK(ip, quota_allocation); 1457 DQUOT_FREE_BLOCK(ip, quota_allocation);
1462 1458
1463 return (rc); 1459 return (rc);
1464 } 1460 }
1465 1461
1466 1462
1467 /* 1463 /*
1468 * xtSplitRoot() 1464 * xtSplitRoot()
1469 * 1465 *
1470 * function: 1466 * function:
1471 * split the full root page into original/root/split page and new 1467 * split the full root page into original/root/split page and new
1472 * right page 1468 * right page
1473 * i.e., root remains fixed in tree anchor (inode) and the root is 1469 * i.e., root remains fixed in tree anchor (inode) and the root is
1474 * copied to a single new right child page since root page << 1470 * copied to a single new right child page since root page <<
1475 * non-root page, and the split root page contains a single entry 1471 * non-root page, and the split root page contains a single entry
1476 * for the new right child page. 1472 * for the new right child page.
1477 * 1473 *
1478 * parameter: 1474 * parameter:
1479 * int tid, 1475 * int tid,
1480 * struct inode *ip, 1476 * struct inode *ip,
1481 * struct xtsplit *split, 1477 * struct xtsplit *split,
1482 * struct metapage **rmpp) 1478 * struct metapage **rmpp)
1483 * 1479 *
1484 * return: 1480 * return:
1485 * Pointer to page in which to insert or NULL on error. 1481 * Pointer to page in which to insert or NULL on error.
1486 */ 1482 */
1487 static int 1483 static int
1488 xtSplitRoot(tid_t tid, 1484 xtSplitRoot(tid_t tid,
1489 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1485 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1490 { 1486 {
1491 xtpage_t *sp; 1487 xtpage_t *sp;
1492 struct metapage *rmp; 1488 struct metapage *rmp;
1493 xtpage_t *rp; 1489 xtpage_t *rp;
1494 s64 rbn; 1490 s64 rbn;
1495 int skip, nextindex; 1491 int skip, nextindex;
1496 xad_t *xad; 1492 xad_t *xad;
1497 pxd_t *pxd; 1493 pxd_t *pxd;
1498 struct pxdlist *pxdlist; 1494 struct pxdlist *pxdlist;
1499 struct tlock *tlck; 1495 struct tlock *tlck;
1500 struct xtlock *xtlck; 1496 struct xtlock *xtlck;
1501 1497
1502 sp = &JFS_IP(ip)->i_xtroot; 1498 sp = &JFS_IP(ip)->i_xtroot;
1503 1499
1504 INCREMENT(xtStat.split); 1500 INCREMENT(xtStat.split);
1505 1501
1506 /* 1502 /*
1507 * allocate a single (right) child page 1503 * allocate a single (right) child page
1508 */ 1504 */
1509 pxdlist = split->pxdlist; 1505 pxdlist = split->pxdlist;
1510 pxd = &pxdlist->pxd[pxdlist->npxd]; 1506 pxd = &pxdlist->pxd[pxdlist->npxd];
1511 pxdlist->npxd++; 1507 pxdlist->npxd++;
1512 rbn = addressPXD(pxd); 1508 rbn = addressPXD(pxd);
1513 rmp = get_metapage(ip, rbn, PSIZE, 1); 1509 rmp = get_metapage(ip, rbn, PSIZE, 1);
1514 if (rmp == NULL) 1510 if (rmp == NULL)
1515 return -EIO; 1511 return -EIO;
1516 1512
1517 /* Allocate blocks to quota. */ 1513 /* Allocate blocks to quota. */
1518 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1514 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1519 release_metapage(rmp); 1515 release_metapage(rmp);
1520 return -EDQUOT; 1516 return -EDQUOT;
1521 } 1517 }
1522 1518
1523 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1519 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1524 1520
1525 /* 1521 /*
1526 * acquire a transaction lock on the new right page; 1522 * acquire a transaction lock on the new right page;
1527 * 1523 *
1528 * action: new page; 1524 * action: new page;
1529 */ 1525 */
1530 BT_MARK_DIRTY(rmp, ip); 1526 BT_MARK_DIRTY(rmp, ip);
1531 1527
1532 rp = (xtpage_t *) rmp->data; 1528 rp = (xtpage_t *) rmp->data;
1533 rp->header.flag = 1529 rp->header.flag =
1534 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1530 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1535 rp->header.self = *pxd; 1531 rp->header.self = *pxd;
1536 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1532 rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1537 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1533 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1538 1534
1539 /* initialize sibling pointers */ 1535 /* initialize sibling pointers */
1540 rp->header.next = 0; 1536 rp->header.next = 0;
1541 rp->header.prev = 0; 1537 rp->header.prev = 0;
1542 1538
1543 /* 1539 /*
1544 * copy the in-line root page into new right page extent 1540 * copy the in-line root page into new right page extent
1545 */ 1541 */
1546 nextindex = le16_to_cpu(sp->header.maxentry); 1542 nextindex = le16_to_cpu(sp->header.maxentry);
1547 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1543 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1548 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1544 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1549 1545
1550 /* 1546 /*
1551 * insert the new entry into the new right/child page 1547 * insert the new entry into the new right/child page
1552 * (skip index in the new right page will not change) 1548 * (skip index in the new right page will not change)
1553 */ 1549 */
1554 skip = split->index; 1550 skip = split->index;
1555 /* if insert into middle, shift right remaining entries */ 1551 /* if insert into middle, shift right remaining entries */
1556 if (skip != nextindex) 1552 if (skip != nextindex)
1557 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1553 memmove(&rp->xad[skip + 1], &rp->xad[skip],
1558 (nextindex - skip) * sizeof(xad_t)); 1554 (nextindex - skip) * sizeof(xad_t));
1559 1555
1560 xad = &rp->xad[skip]; 1556 xad = &rp->xad[skip];
1561 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1557 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1562 1558
1563 /* update page header */ 1559 /* update page header */
1564 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1560 rp->header.nextindex = cpu_to_le16(nextindex + 1);
1565 1561
1566 if (!test_cflag(COMMIT_Nolink, ip)) { 1562 if (!test_cflag(COMMIT_Nolink, ip)) {
1567 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1563 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1568 xtlck = (struct xtlock *) & tlck->lock; 1564 xtlck = (struct xtlock *) & tlck->lock;
1569 xtlck->lwm.offset = XTENTRYSTART; 1565 xtlck->lwm.offset = XTENTRYSTART;
1570 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1566 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1571 XTENTRYSTART; 1567 XTENTRYSTART;
1572 } 1568 }
1573 1569
1574 /* 1570 /*
1575 * reset the root 1571 * reset the root
1576 * 1572 *
1577 * init root with the single entry for the new right page 1573 * init root with the single entry for the new right page
1578 * set the 1st entry offset to 0, which force the left-most key 1574 * set the 1st entry offset to 0, which force the left-most key
1579 * at any level of the tree to be less than any search key. 1575 * at any level of the tree to be less than any search key.
1580 */ 1576 */
1581 /* 1577 /*
1582 * acquire a transaction lock on the root page (in-memory inode); 1578 * acquire a transaction lock on the root page (in-memory inode);
1583 * 1579 *
1584 * action: root split; 1580 * action: root split;
1585 */ 1581 */
1586 BT_MARK_DIRTY(split->mp, ip); 1582 BT_MARK_DIRTY(split->mp, ip);
1587 1583
1588 xad = &sp->xad[XTENTRYSTART]; 1584 xad = &sp->xad[XTENTRYSTART];
1589 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1585 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1590 1586
1591 /* update page header of root */ 1587 /* update page header of root */
1592 sp->header.flag &= ~BT_LEAF; 1588 sp->header.flag &= ~BT_LEAF;
1593 sp->header.flag |= BT_INTERNAL; 1589 sp->header.flag |= BT_INTERNAL;
1594 1590
1595 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1591 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1596 1592
1597 if (!test_cflag(COMMIT_Nolink, ip)) { 1593 if (!test_cflag(COMMIT_Nolink, ip)) {
1598 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1594 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1599 xtlck = (struct xtlock *) & tlck->lock; 1595 xtlck = (struct xtlock *) & tlck->lock;
1600 xtlck->lwm.offset = XTENTRYSTART; 1596 xtlck->lwm.offset = XTENTRYSTART;
1601 xtlck->lwm.length = 1; 1597 xtlck->lwm.length = 1;
1602 } 1598 }
1603 1599
1604 *rmpp = rmp; 1600 *rmpp = rmp;
1605 1601
1606 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1602 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1607 return 0; 1603 return 0;
1608 } 1604 }
1609 1605
1610 1606
1611 /* 1607 /*
1612 * xtExtend() 1608 * xtExtend()
1613 * 1609 *
1614 * function: extend in-place; 1610 * function: extend in-place;
1615 * 1611 *
1616 * note: existing extent may or may not have been committed. 1612 * note: existing extent may or may not have been committed.
1617 * caller is responsible for pager buffer cache update, and 1613 * caller is responsible for pager buffer cache update, and
1618 * working block allocation map update; 1614 * working block allocation map update;
1619 * update pmap: alloc whole extended extent; 1615 * update pmap: alloc whole extended extent;
1620 */ 1616 */
1621 int xtExtend(tid_t tid, /* transaction id */ 1617 int xtExtend(tid_t tid, /* transaction id */
1622 struct inode *ip, s64 xoff, /* delta extent offset */ 1618 struct inode *ip, s64 xoff, /* delta extent offset */
1623 s32 xlen, /* delta extent length */ 1619 s32 xlen, /* delta extent length */
1624 int flag) 1620 int flag)
1625 { 1621 {
1626 int rc = 0; 1622 int rc = 0;
1627 int cmp; 1623 int cmp;
1628 struct metapage *mp; /* meta-page buffer */ 1624 struct metapage *mp; /* meta-page buffer */
1629 xtpage_t *p; /* base B+-tree index page */ 1625 xtpage_t *p; /* base B+-tree index page */
1630 s64 bn; 1626 s64 bn;
1631 int index, nextindex, len; 1627 int index, nextindex, len;
1632 struct btstack btstack; /* traverse stack */ 1628 struct btstack btstack; /* traverse stack */
1633 struct xtsplit split; /* split information */ 1629 struct xtsplit split; /* split information */
1634 xad_t *xad; 1630 xad_t *xad;
1635 s64 xaddr; 1631 s64 xaddr;
1636 struct tlock *tlck; 1632 struct tlock *tlck;
1637 struct xtlock *xtlck = NULL; 1633 struct xtlock *xtlck = NULL;
1638 1634
1639 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1635 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1640 1636
1641 /* there must exist extent to be extended */ 1637 /* there must exist extent to be extended */
1642 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1638 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1643 return rc; 1639 return rc;
1644 1640
1645 /* retrieve search result */ 1641 /* retrieve search result */
1646 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1642 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1647 1643
1648 if (cmp != 0) { 1644 if (cmp != 0) {
1649 XT_PUTPAGE(mp); 1645 XT_PUTPAGE(mp);
1650 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1646 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent");
1651 return -EIO; 1647 return -EIO;
1652 } 1648 }
1653 1649
1654 /* extension must be contiguous */ 1650 /* extension must be contiguous */
1655 xad = &p->xad[index]; 1651 xad = &p->xad[index];
1656 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1652 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1657 XT_PUTPAGE(mp); 1653 XT_PUTPAGE(mp);
1658 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1654 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous");
1659 return -EIO; 1655 return -EIO;
1660 } 1656 }
1661 1657
1662 /* 1658 /*
1663 * acquire a transaction lock on the leaf page; 1659 * acquire a transaction lock on the leaf page;
1664 * 1660 *
1665 * action: xad insertion/extension; 1661 * action: xad insertion/extension;
1666 */ 1662 */
1667 BT_MARK_DIRTY(mp, ip); 1663 BT_MARK_DIRTY(mp, ip);
1668 if (!test_cflag(COMMIT_Nolink, ip)) { 1664 if (!test_cflag(COMMIT_Nolink, ip)) {
1669 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1665 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1670 xtlck = (struct xtlock *) & tlck->lock; 1666 xtlck = (struct xtlock *) & tlck->lock;
1671 } 1667 }
1672 1668
1673 /* extend will overflow extent ? */ 1669 /* extend will overflow extent ? */
1674 xlen = lengthXAD(xad) + xlen; 1670 xlen = lengthXAD(xad) + xlen;
1675 if ((len = xlen - MAXXLEN) <= 0) 1671 if ((len = xlen - MAXXLEN) <= 0)
1676 goto extendOld; 1672 goto extendOld;
1677 1673
1678 /* 1674 /*
1679 * extent overflow: insert entry for new extent 1675 * extent overflow: insert entry for new extent
1680 */ 1676 */
1681 //insertNew: 1677 //insertNew:
1682 xoff = offsetXAD(xad) + MAXXLEN; 1678 xoff = offsetXAD(xad) + MAXXLEN;
1683 xaddr = addressXAD(xad) + MAXXLEN; 1679 xaddr = addressXAD(xad) + MAXXLEN;
1684 nextindex = le16_to_cpu(p->header.nextindex); 1680 nextindex = le16_to_cpu(p->header.nextindex);
1685 1681
1686 /* 1682 /*
1687 * if the leaf page is full, insert the new entry and 1683 * if the leaf page is full, insert the new entry and
1688 * propagate up the router entry for the new page from split 1684 * propagate up the router entry for the new page from split
1689 * 1685 *
1690 * The xtSplitUp() will insert the entry and unpin the leaf page. 1686 * The xtSplitUp() will insert the entry and unpin the leaf page.
1691 */ 1687 */
1692 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1688 if (nextindex == le16_to_cpu(p->header.maxentry)) {
1693 /* xtSpliUp() unpins leaf pages */ 1689 /* xtSpliUp() unpins leaf pages */
1694 split.mp = mp; 1690 split.mp = mp;
1695 split.index = index + 1; 1691 split.index = index + 1;
1696 split.flag = XAD_NEW; 1692 split.flag = XAD_NEW;
1697 split.off = xoff; /* split offset */ 1693 split.off = xoff; /* split offset */
1698 split.len = len; 1694 split.len = len;
1699 split.addr = xaddr; 1695 split.addr = xaddr;
1700 split.pxdlist = NULL; 1696 split.pxdlist = NULL;
1701 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1697 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1702 return rc; 1698 return rc;
1703 1699
1704 /* get back old page */ 1700 /* get back old page */
1705 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1701 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1706 if (rc) 1702 if (rc)
1707 return rc; 1703 return rc;
1708 /* 1704 /*
1709 * if leaf root has been split, original root has been 1705 * if leaf root has been split, original root has been
1710 * copied to new child page, i.e., original entry now 1706 * copied to new child page, i.e., original entry now
1711 * resides on the new child page; 1707 * resides on the new child page;
1712 */ 1708 */
1713 if (p->header.flag & BT_INTERNAL) { 1709 if (p->header.flag & BT_INTERNAL) {
1714 ASSERT(p->header.nextindex == 1710 ASSERT(p->header.nextindex ==
1715 cpu_to_le16(XTENTRYSTART + 1)); 1711 cpu_to_le16(XTENTRYSTART + 1));
1716 xad = &p->xad[XTENTRYSTART]; 1712 xad = &p->xad[XTENTRYSTART];
1717 bn = addressXAD(xad); 1713 bn = addressXAD(xad);
1718 XT_PUTPAGE(mp); 1714 XT_PUTPAGE(mp);
1719 1715
1720 /* get new child page */ 1716 /* get new child page */
1721 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1717 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1722 if (rc) 1718 if (rc)
1723 return rc; 1719 return rc;
1724 1720
1725 BT_MARK_DIRTY(mp, ip); 1721 BT_MARK_DIRTY(mp, ip);
1726 if (!test_cflag(COMMIT_Nolink, ip)) { 1722 if (!test_cflag(COMMIT_Nolink, ip)) {
1727 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1723 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1728 xtlck = (struct xtlock *) & tlck->lock; 1724 xtlck = (struct xtlock *) & tlck->lock;
1729 } 1725 }
1730 } 1726 }
1731 } 1727 }
1732 /* 1728 /*
1733 * insert the new entry into the leaf page 1729 * insert the new entry into the leaf page
1734 */ 1730 */
1735 else { 1731 else {
1736 /* insert the new entry: mark the entry NEW */ 1732 /* insert the new entry: mark the entry NEW */
1737 xad = &p->xad[index + 1]; 1733 xad = &p->xad[index + 1];
1738 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1734 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1739 1735
1740 /* advance next available entry index */ 1736 /* advance next available entry index */
1741 p->header.nextindex = 1737 le16_add_cpu(&p->header.nextindex, 1);
1742 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1743 } 1738 }
1744 1739
1745 /* get back old entry */ 1740 /* get back old entry */
1746 xad = &p->xad[index]; 1741 xad = &p->xad[index];
1747 xlen = MAXXLEN; 1742 xlen = MAXXLEN;
1748 1743
1749 /* 1744 /*
1750 * extend old extent 1745 * extend old extent
1751 */ 1746 */
1752 extendOld: 1747 extendOld:
1753 XADlength(xad, xlen); 1748 XADlength(xad, xlen);
1754 if (!(xad->flag & XAD_NEW)) 1749 if (!(xad->flag & XAD_NEW))
1755 xad->flag |= XAD_EXTENDED; 1750 xad->flag |= XAD_EXTENDED;
1756 1751
1757 if (!test_cflag(COMMIT_Nolink, ip)) { 1752 if (!test_cflag(COMMIT_Nolink, ip)) {
1758 xtlck->lwm.offset = 1753 xtlck->lwm.offset =
1759 (xtlck->lwm.offset) ? min(index, 1754 (xtlck->lwm.offset) ? min(index,
1760 (int)xtlck->lwm.offset) : index; 1755 (int)xtlck->lwm.offset) : index;
1761 xtlck->lwm.length = 1756 xtlck->lwm.length =
1762 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1757 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1763 } 1758 }
1764 1759
1765 /* unpin the leaf page */ 1760 /* unpin the leaf page */
1766 XT_PUTPAGE(mp); 1761 XT_PUTPAGE(mp);
1767 1762
1768 return rc; 1763 return rc;
1769 } 1764 }
1770 1765
1771 #ifdef _NOTYET 1766 #ifdef _NOTYET
1772 /* 1767 /*
1773 * xtTailgate() 1768 * xtTailgate()
1774 * 1769 *
1775 * function: split existing 'tail' extent 1770 * function: split existing 'tail' extent
1776 * (split offset >= start offset of tail extent), and 1771 * (split offset >= start offset of tail extent), and
1777 * relocate and extend the split tail half; 1772 * relocate and extend the split tail half;
1778 * 1773 *
1779 * note: existing extent may or may not have been committed. 1774 * note: existing extent may or may not have been committed.
1780 * caller is responsible for pager buffer cache update, and 1775 * caller is responsible for pager buffer cache update, and
1781 * working block allocation map update; 1776 * working block allocation map update;
1782 * update pmap: free old split tail extent, alloc new extent; 1777 * update pmap: free old split tail extent, alloc new extent;
1783 */ 1778 */
1784 int xtTailgate(tid_t tid, /* transaction id */ 1779 int xtTailgate(tid_t tid, /* transaction id */
1785 struct inode *ip, s64 xoff, /* split/new extent offset */ 1780 struct inode *ip, s64 xoff, /* split/new extent offset */
1786 s32 xlen, /* new extent length */ 1781 s32 xlen, /* new extent length */
1787 s64 xaddr, /* new extent address */ 1782 s64 xaddr, /* new extent address */
1788 int flag) 1783 int flag)
1789 { 1784 {
1790 int rc = 0; 1785 int rc = 0;
1791 int cmp; 1786 int cmp;
1792 struct metapage *mp; /* meta-page buffer */ 1787 struct metapage *mp; /* meta-page buffer */
1793 xtpage_t *p; /* base B+-tree index page */ 1788 xtpage_t *p; /* base B+-tree index page */
1794 s64 bn; 1789 s64 bn;
1795 int index, nextindex, llen, rlen; 1790 int index, nextindex, llen, rlen;
1796 struct btstack btstack; /* traverse stack */ 1791 struct btstack btstack; /* traverse stack */
1797 struct xtsplit split; /* split information */ 1792 struct xtsplit split; /* split information */
1798 xad_t *xad; 1793 xad_t *xad;
1799 struct tlock *tlck; 1794 struct tlock *tlck;
1800 struct xtlock *xtlck = 0; 1795 struct xtlock *xtlck = 0;
1801 struct tlock *mtlck; 1796 struct tlock *mtlck;
1802 struct maplock *pxdlock; 1797 struct maplock *pxdlock;
1803 1798
1804 /* 1799 /*
1805 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1800 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1806 (ulong)xoff, xlen, (ulong)xaddr); 1801 (ulong)xoff, xlen, (ulong)xaddr);
1807 */ 1802 */
1808 1803
1809 /* there must exist extent to be tailgated */ 1804 /* there must exist extent to be tailgated */
1810 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1805 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
1811 return rc; 1806 return rc;
1812 1807
1813 /* retrieve search result */ 1808 /* retrieve search result */
1814 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1809 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1815 1810
1816 if (cmp != 0) { 1811 if (cmp != 0) {
1817 XT_PUTPAGE(mp); 1812 XT_PUTPAGE(mp);
1818 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1813 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent");
1819 return -EIO; 1814 return -EIO;
1820 } 1815 }
1821 1816
1822 /* entry found must be last entry */ 1817 /* entry found must be last entry */
1823 nextindex = le16_to_cpu(p->header.nextindex); 1818 nextindex = le16_to_cpu(p->header.nextindex);
1824 if (index != nextindex - 1) { 1819 if (index != nextindex - 1) {
1825 XT_PUTPAGE(mp); 1820 XT_PUTPAGE(mp);
1826 jfs_error(ip->i_sb, 1821 jfs_error(ip->i_sb,
1827 "xtTailgate: the entry found is not the last entry"); 1822 "xtTailgate: the entry found is not the last entry");
1828 return -EIO; 1823 return -EIO;
1829 } 1824 }
1830 1825
1831 BT_MARK_DIRTY(mp, ip); 1826 BT_MARK_DIRTY(mp, ip);
1832 /* 1827 /*
1833 * acquire tlock of the leaf page containing original entry 1828 * acquire tlock of the leaf page containing original entry
1834 */ 1829 */
1835 if (!test_cflag(COMMIT_Nolink, ip)) { 1830 if (!test_cflag(COMMIT_Nolink, ip)) {
1836 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1831 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1837 xtlck = (struct xtlock *) & tlck->lock; 1832 xtlck = (struct xtlock *) & tlck->lock;
1838 } 1833 }
1839 1834
1840 /* completely replace extent ? */ 1835 /* completely replace extent ? */
1841 xad = &p->xad[index]; 1836 xad = &p->xad[index];
1842 /* 1837 /*
1843 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1838 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1844 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1839 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1845 */ 1840 */
1846 if ((llen = xoff - offsetXAD(xad)) == 0) 1841 if ((llen = xoff - offsetXAD(xad)) == 0)
1847 goto updateOld; 1842 goto updateOld;
1848 1843
1849 /* 1844 /*
1850 * partially replace extent: insert entry for new extent 1845 * partially replace extent: insert entry for new extent
1851 */ 1846 */
1852 //insertNew: 1847 //insertNew:
1853 /* 1848 /*
1854 * if the leaf page is full, insert the new entry and 1849 * if the leaf page is full, insert the new entry and
1855 * propagate up the router entry for the new page from split 1850 * propagate up the router entry for the new page from split
1856 * 1851 *
1857 * The xtSplitUp() will insert the entry and unpin the leaf page. 1852 * The xtSplitUp() will insert the entry and unpin the leaf page.
1858 */ 1853 */
1859 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1854 if (nextindex == le16_to_cpu(p->header.maxentry)) {
1860 /* xtSpliUp() unpins leaf pages */ 1855 /* xtSpliUp() unpins leaf pages */
1861 split.mp = mp; 1856 split.mp = mp;
1862 split.index = index + 1; 1857 split.index = index + 1;
1863 split.flag = XAD_NEW; 1858 split.flag = XAD_NEW;
1864 split.off = xoff; /* split offset */ 1859 split.off = xoff; /* split offset */
1865 split.len = xlen; 1860 split.len = xlen;
1866 split.addr = xaddr; 1861 split.addr = xaddr;
1867 split.pxdlist = NULL; 1862 split.pxdlist = NULL;
1868 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1863 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1869 return rc; 1864 return rc;
1870 1865
1871 /* get back old page */ 1866 /* get back old page */
1872 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1867 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1873 if (rc) 1868 if (rc)
1874 return rc; 1869 return rc;
1875 /* 1870 /*
1876 * if leaf root has been split, original root has been 1871 * if leaf root has been split, original root has been
1877 * copied to new child page, i.e., original entry now 1872 * copied to new child page, i.e., original entry now
1878 * resides on the new child page; 1873 * resides on the new child page;
1879 */ 1874 */
1880 if (p->header.flag & BT_INTERNAL) { 1875 if (p->header.flag & BT_INTERNAL) {
1881 ASSERT(p->header.nextindex == 1876 ASSERT(p->header.nextindex ==
1882 cpu_to_le16(XTENTRYSTART + 1)); 1877 cpu_to_le16(XTENTRYSTART + 1));
1883 xad = &p->xad[XTENTRYSTART]; 1878 xad = &p->xad[XTENTRYSTART];
1884 bn = addressXAD(xad); 1879 bn = addressXAD(xad);
1885 XT_PUTPAGE(mp); 1880 XT_PUTPAGE(mp);
1886 1881
1887 /* get new child page */ 1882 /* get new child page */
1888 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1883 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1889 if (rc) 1884 if (rc)
1890 return rc; 1885 return rc;
1891 1886
1892 BT_MARK_DIRTY(mp, ip); 1887 BT_MARK_DIRTY(mp, ip);
1893 if (!test_cflag(COMMIT_Nolink, ip)) { 1888 if (!test_cflag(COMMIT_Nolink, ip)) {
1894 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1889 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1895 xtlck = (struct xtlock *) & tlck->lock; 1890 xtlck = (struct xtlock *) & tlck->lock;
1896 } 1891 }
1897 } 1892 }
1898 } 1893 }
1899 /* 1894 /*
1900 * insert the new entry into the leaf page 1895 * insert the new entry into the leaf page
1901 */ 1896 */
1902 else { 1897 else {
1903 /* insert the new entry: mark the entry NEW */ 1898 /* insert the new entry: mark the entry NEW */
1904 xad = &p->xad[index + 1]; 1899 xad = &p->xad[index + 1];
1905 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1900 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1906 1901
1907 /* advance next available entry index */ 1902 /* advance next available entry index */
1908 p->header.nextindex = 1903 le16_add_cpu(&p->header.nextindex, 1);
1909 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1910 } 1904 }
1911 1905
1912 /* get back old XAD */ 1906 /* get back old XAD */
1913 xad = &p->xad[index]; 1907 xad = &p->xad[index];
1914 1908
1915 /* 1909 /*
1916 * truncate/relocate old extent at split offset 1910 * truncate/relocate old extent at split offset
1917 */ 1911 */
1918 updateOld: 1912 updateOld:
1919 /* update dmap for old/committed/truncated extent */ 1913 /* update dmap for old/committed/truncated extent */
1920 rlen = lengthXAD(xad) - llen; 1914 rlen = lengthXAD(xad) - llen;
1921 if (!(xad->flag & XAD_NEW)) { 1915 if (!(xad->flag & XAD_NEW)) {
1922 /* free from PWMAP at commit */ 1916 /* free from PWMAP at commit */
1923 if (!test_cflag(COMMIT_Nolink, ip)) { 1917 if (!test_cflag(COMMIT_Nolink, ip)) {
1924 mtlck = txMaplock(tid, ip, tlckMAP); 1918 mtlck = txMaplock(tid, ip, tlckMAP);
1925 pxdlock = (struct maplock *) & mtlck->lock; 1919 pxdlock = (struct maplock *) & mtlck->lock;
1926 pxdlock->flag = mlckFREEPXD; 1920 pxdlock->flag = mlckFREEPXD;
1927 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1921 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1928 PXDlength(&pxdlock->pxd, rlen); 1922 PXDlength(&pxdlock->pxd, rlen);
1929 pxdlock->index = 1; 1923 pxdlock->index = 1;
1930 } 1924 }
1931 } else 1925 } else
1932 /* free from WMAP */ 1926 /* free from WMAP */
1933 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1927 dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1934 1928
1935 if (llen) 1929 if (llen)
1936 /* truncate */ 1930 /* truncate */
1937 XADlength(xad, llen); 1931 XADlength(xad, llen);
1938 else 1932 else
1939 /* replace */ 1933 /* replace */
1940 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1934 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1941 1935
1942 if (!test_cflag(COMMIT_Nolink, ip)) { 1936 if (!test_cflag(COMMIT_Nolink, ip)) {
1943 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1937 xtlck->lwm.offset = (xtlck->lwm.offset) ?
1944 min(index, (int)xtlck->lwm.offset) : index; 1938 min(index, (int)xtlck->lwm.offset) : index;
1945 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1939 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1946 xtlck->lwm.offset; 1940 xtlck->lwm.offset;
1947 } 1941 }
1948 1942
1949 /* unpin the leaf page */ 1943 /* unpin the leaf page */
1950 XT_PUTPAGE(mp); 1944 XT_PUTPAGE(mp);
1951 1945
1952 return rc; 1946 return rc;
1953 } 1947 }
1954 #endif /* _NOTYET */ 1948 #endif /* _NOTYET */
1955 1949
1956 /* 1950 /*
1957 * xtUpdate() 1951 * xtUpdate()
1958 * 1952 *
1959 * function: update XAD; 1953 * function: update XAD;
1960 * 1954 *
1961 * update extent for allocated_but_not_recorded or 1955 * update extent for allocated_but_not_recorded or
1962 * compressed extent; 1956 * compressed extent;
1963 * 1957 *
1964 * parameter: 1958 * parameter:
1965 * nxad - new XAD; 1959 * nxad - new XAD;
1966 * logical extent of the specified XAD must be completely 1960 * logical extent of the specified XAD must be completely
1967 * contained by an existing XAD; 1961 * contained by an existing XAD;
1968 */ 1962 */
1969 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1963 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1970 { /* new XAD */ 1964 { /* new XAD */
1971 int rc = 0; 1965 int rc = 0;
1972 int cmp; 1966 int cmp;
1973 struct metapage *mp; /* meta-page buffer */ 1967 struct metapage *mp; /* meta-page buffer */
1974 xtpage_t *p; /* base B+-tree index page */ 1968 xtpage_t *p; /* base B+-tree index page */
1975 s64 bn; 1969 s64 bn;
1976 int index0, index, newindex, nextindex; 1970 int index0, index, newindex, nextindex;
1977 struct btstack btstack; /* traverse stack */ 1971 struct btstack btstack; /* traverse stack */
1978 struct xtsplit split; /* split information */ 1972 struct xtsplit split; /* split information */
1979 xad_t *xad, *lxad, *rxad; 1973 xad_t *xad, *lxad, *rxad;
1980 int xflag; 1974 int xflag;
1981 s64 nxoff, xoff; 1975 s64 nxoff, xoff;
1982 int nxlen, xlen, lxlen, rxlen; 1976 int nxlen, xlen, lxlen, rxlen;
1983 s64 nxaddr, xaddr; 1977 s64 nxaddr, xaddr;
1984 struct tlock *tlck; 1978 struct tlock *tlck;
1985 struct xtlock *xtlck = NULL; 1979 struct xtlock *xtlck = NULL;
1986 int newpage = 0; 1980 int newpage = 0;
1987 1981
1988 /* there must exist extent to be tailgated */ 1982 /* there must exist extent to be tailgated */
1989 nxoff = offsetXAD(nxad); 1983 nxoff = offsetXAD(nxad);
1990 nxlen = lengthXAD(nxad); 1984 nxlen = lengthXAD(nxad);
1991 nxaddr = addressXAD(nxad); 1985 nxaddr = addressXAD(nxad);
1992 1986
1993 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1987 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1994 return rc; 1988 return rc;
1995 1989
1996 /* retrieve search result */ 1990 /* retrieve search result */
1997 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1991 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1998 1992
1999 if (cmp != 0) { 1993 if (cmp != 0) {
2000 XT_PUTPAGE(mp); 1994 XT_PUTPAGE(mp);
2001 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 1995 jfs_error(ip->i_sb, "xtUpdate: Could not find extent");
2002 return -EIO; 1996 return -EIO;
2003 } 1997 }
2004 1998
2005 BT_MARK_DIRTY(mp, ip); 1999 BT_MARK_DIRTY(mp, ip);
2006 /* 2000 /*
2007 * acquire tlock of the leaf page containing original entry 2001 * acquire tlock of the leaf page containing original entry
2008 */ 2002 */
2009 if (!test_cflag(COMMIT_Nolink, ip)) { 2003 if (!test_cflag(COMMIT_Nolink, ip)) {
2010 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2004 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2011 xtlck = (struct xtlock *) & tlck->lock; 2005 xtlck = (struct xtlock *) & tlck->lock;
2012 } 2006 }
2013 2007
2014 xad = &p->xad[index0]; 2008 xad = &p->xad[index0];
2015 xflag = xad->flag; 2009 xflag = xad->flag;
2016 xoff = offsetXAD(xad); 2010 xoff = offsetXAD(xad);
2017 xlen = lengthXAD(xad); 2011 xlen = lengthXAD(xad);
2018 xaddr = addressXAD(xad); 2012 xaddr = addressXAD(xad);
2019 2013
2020 /* nXAD must be completely contained within XAD */ 2014 /* nXAD must be completely contained within XAD */
2021 if ((xoff > nxoff) || 2015 if ((xoff > nxoff) ||
2022 (nxoff + nxlen > xoff + xlen)) { 2016 (nxoff + nxlen > xoff + xlen)) {
2023 XT_PUTPAGE(mp); 2017 XT_PUTPAGE(mp);
2024 jfs_error(ip->i_sb, 2018 jfs_error(ip->i_sb,
2025 "xtUpdate: nXAD in not completely contained within XAD"); 2019 "xtUpdate: nXAD in not completely contained within XAD");
2026 return -EIO; 2020 return -EIO;
2027 } 2021 }
2028 2022
2029 index = index0; 2023 index = index0;
2030 newindex = index + 1; 2024 newindex = index + 1;
2031 nextindex = le16_to_cpu(p->header.nextindex); 2025 nextindex = le16_to_cpu(p->header.nextindex);
2032 2026
2033 #ifdef _JFS_WIP_NOCOALESCE 2027 #ifdef _JFS_WIP_NOCOALESCE
2034 if (xoff < nxoff) 2028 if (xoff < nxoff)
2035 goto updateRight; 2029 goto updateRight;
2036 2030
2037 /* 2031 /*
2038 * replace XAD with nXAD 2032 * replace XAD with nXAD
2039 */ 2033 */
2040 replace: /* (nxoff == xoff) */ 2034 replace: /* (nxoff == xoff) */
2041 if (nxlen == xlen) { 2035 if (nxlen == xlen) {
2042 /* replace XAD with nXAD:recorded */ 2036 /* replace XAD with nXAD:recorded */
2043 *xad = *nxad; 2037 *xad = *nxad;
2044 xad->flag = xflag & ~XAD_NOTRECORDED; 2038 xad->flag = xflag & ~XAD_NOTRECORDED;
2045 2039
2046 goto out; 2040 goto out;
2047 } else /* (nxlen < xlen) */ 2041 } else /* (nxlen < xlen) */
2048 goto updateLeft; 2042 goto updateLeft;
2049 #endif /* _JFS_WIP_NOCOALESCE */ 2043 #endif /* _JFS_WIP_NOCOALESCE */
2050 2044
2051 /* #ifdef _JFS_WIP_COALESCE */ 2045 /* #ifdef _JFS_WIP_COALESCE */
2052 if (xoff < nxoff) 2046 if (xoff < nxoff)
2053 goto coalesceRight; 2047 goto coalesceRight;
2054 2048
2055 /* 2049 /*
2056 * coalesce with left XAD 2050 * coalesce with left XAD
2057 */ 2051 */
2058 //coalesceLeft: /* (xoff == nxoff) */ 2052 //coalesceLeft: /* (xoff == nxoff) */
2059 /* is XAD first entry of page ? */ 2053 /* is XAD first entry of page ? */
2060 if (index == XTENTRYSTART) 2054 if (index == XTENTRYSTART)
2061 goto replace; 2055 goto replace;
2062 2056
2063 /* is nXAD logically and physically contiguous with lXAD ? */ 2057 /* is nXAD logically and physically contiguous with lXAD ? */
2064 lxad = &p->xad[index - 1]; 2058 lxad = &p->xad[index - 1];
2065 lxlen = lengthXAD(lxad); 2059 lxlen = lengthXAD(lxad);
2066 if (!(lxad->flag & XAD_NOTRECORDED) && 2060 if (!(lxad->flag & XAD_NOTRECORDED) &&
2067 (nxoff == offsetXAD(lxad) + lxlen) && 2061 (nxoff == offsetXAD(lxad) + lxlen) &&
2068 (nxaddr == addressXAD(lxad) + lxlen) && 2062 (nxaddr == addressXAD(lxad) + lxlen) &&
2069 (lxlen + nxlen < MAXXLEN)) { 2063 (lxlen + nxlen < MAXXLEN)) {
2070 /* extend right lXAD */ 2064 /* extend right lXAD */
2071 index0 = index - 1; 2065 index0 = index - 1;
2072 XADlength(lxad, lxlen + nxlen); 2066 XADlength(lxad, lxlen + nxlen);
2073 2067
2074 /* If we just merged two extents together, need to make sure the 2068 /* If we just merged two extents together, need to make sure the
2075 * right extent gets logged. If the left one is marked XAD_NEW, 2069 * right extent gets logged. If the left one is marked XAD_NEW,
2076 * then we know it will be logged. Otherwise, mark as 2070 * then we know it will be logged. Otherwise, mark as
2077 * XAD_EXTENDED 2071 * XAD_EXTENDED
2078 */ 2072 */
2079 if (!(lxad->flag & XAD_NEW)) 2073 if (!(lxad->flag & XAD_NEW))
2080 lxad->flag |= XAD_EXTENDED; 2074 lxad->flag |= XAD_EXTENDED;
2081 2075
2082 if (xlen > nxlen) { 2076 if (xlen > nxlen) {
2083 /* truncate XAD */ 2077 /* truncate XAD */
2084 XADoffset(xad, xoff + nxlen); 2078 XADoffset(xad, xoff + nxlen);
2085 XADlength(xad, xlen - nxlen); 2079 XADlength(xad, xlen - nxlen);
2086 XADaddress(xad, xaddr + nxlen); 2080 XADaddress(xad, xaddr + nxlen);
2087 goto out; 2081 goto out;
2088 } else { /* (xlen == nxlen) */ 2082 } else { /* (xlen == nxlen) */
2089 2083
2090 /* remove XAD */ 2084 /* remove XAD */
2091 if (index < nextindex - 1) 2085 if (index < nextindex - 1)
2092 memmove(&p->xad[index], &p->xad[index + 1], 2086 memmove(&p->xad[index], &p->xad[index + 1],
2093 (nextindex - index - 2087 (nextindex - index -
2094 1) << L2XTSLOTSIZE); 2088 1) << L2XTSLOTSIZE);
2095 2089
2096 p->header.nextindex = 2090 p->header.nextindex =
2097 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2091 cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2098 1); 2092 1);
2099 2093
2100 index = index0; 2094 index = index0;
2101 newindex = index + 1; 2095 newindex = index + 1;
2102 nextindex = le16_to_cpu(p->header.nextindex); 2096 nextindex = le16_to_cpu(p->header.nextindex);
2103 xoff = nxoff = offsetXAD(lxad); 2097 xoff = nxoff = offsetXAD(lxad);
2104 xlen = nxlen = lxlen + nxlen; 2098 xlen = nxlen = lxlen + nxlen;
2105 xaddr = nxaddr = addressXAD(lxad); 2099 xaddr = nxaddr = addressXAD(lxad);
2106 goto coalesceRight; 2100 goto coalesceRight;
2107 } 2101 }
2108 } 2102 }
2109 2103
2110 /* 2104 /*
2111 * replace XAD with nXAD 2105 * replace XAD with nXAD
2112 */ 2106 */
2113 replace: /* (nxoff == xoff) */ 2107 replace: /* (nxoff == xoff) */
2114 if (nxlen == xlen) { 2108 if (nxlen == xlen) {
2115 /* replace XAD with nXAD:recorded */ 2109 /* replace XAD with nXAD:recorded */
2116 *xad = *nxad; 2110 *xad = *nxad;
2117 xad->flag = xflag & ~XAD_NOTRECORDED; 2111 xad->flag = xflag & ~XAD_NOTRECORDED;
2118 2112
2119 goto coalesceRight; 2113 goto coalesceRight;
2120 } else /* (nxlen < xlen) */ 2114 } else /* (nxlen < xlen) */
2121 goto updateLeft; 2115 goto updateLeft;
2122 2116
2123 /* 2117 /*
2124 * coalesce with right XAD 2118 * coalesce with right XAD
2125 */ 2119 */
2126 coalesceRight: /* (xoff <= nxoff) */ 2120 coalesceRight: /* (xoff <= nxoff) */
2127 /* is XAD last entry of page ? */ 2121 /* is XAD last entry of page ? */
2128 if (newindex == nextindex) { 2122 if (newindex == nextindex) {
2129 if (xoff == nxoff) 2123 if (xoff == nxoff)
2130 goto out; 2124 goto out;
2131 goto updateRight; 2125 goto updateRight;
2132 } 2126 }
2133 2127
2134 /* is nXAD logically and physically contiguous with rXAD ? */ 2128 /* is nXAD logically and physically contiguous with rXAD ? */
2135 rxad = &p->xad[index + 1]; 2129 rxad = &p->xad[index + 1];
2136 rxlen = lengthXAD(rxad); 2130 rxlen = lengthXAD(rxad);
2137 if (!(rxad->flag & XAD_NOTRECORDED) && 2131 if (!(rxad->flag & XAD_NOTRECORDED) &&
2138 (nxoff + nxlen == offsetXAD(rxad)) && 2132 (nxoff + nxlen == offsetXAD(rxad)) &&
2139 (nxaddr + nxlen == addressXAD(rxad)) && 2133 (nxaddr + nxlen == addressXAD(rxad)) &&
2140 (rxlen + nxlen < MAXXLEN)) { 2134 (rxlen + nxlen < MAXXLEN)) {
2141 /* extend left rXAD */ 2135 /* extend left rXAD */
2142 XADoffset(rxad, nxoff); 2136 XADoffset(rxad, nxoff);
2143 XADlength(rxad, rxlen + nxlen); 2137 XADlength(rxad, rxlen + nxlen);
2144 XADaddress(rxad, nxaddr); 2138 XADaddress(rxad, nxaddr);
2145 2139
2146 /* If we just merged two extents together, need to make sure 2140 /* If we just merged two extents together, need to make sure
2147 * the left extent gets logged. If the right one is marked 2141 * the left extent gets logged. If the right one is marked
2148 * XAD_NEW, then we know it will be logged. Otherwise, mark as 2142 * XAD_NEW, then we know it will be logged. Otherwise, mark as
2149 * XAD_EXTENDED 2143 * XAD_EXTENDED
2150 */ 2144 */
2151 if (!(rxad->flag & XAD_NEW)) 2145 if (!(rxad->flag & XAD_NEW))
2152 rxad->flag |= XAD_EXTENDED; 2146 rxad->flag |= XAD_EXTENDED;
2153 2147
2154 if (xlen > nxlen) 2148 if (xlen > nxlen)
2155 /* truncate XAD */ 2149 /* truncate XAD */
2156 XADlength(xad, xlen - nxlen); 2150 XADlength(xad, xlen - nxlen);
2157 else { /* (xlen == nxlen) */ 2151 else { /* (xlen == nxlen) */
2158 2152
2159 /* remove XAD */ 2153 /* remove XAD */
2160 memmove(&p->xad[index], &p->xad[index + 1], 2154 memmove(&p->xad[index], &p->xad[index + 1],
2161 (nextindex - index - 1) << L2XTSLOTSIZE); 2155 (nextindex - index - 1) << L2XTSLOTSIZE);
2162 2156
2163 p->header.nextindex = 2157 p->header.nextindex =
2164 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2158 cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2165 1); 2159 1);
2166 } 2160 }
2167 2161
2168 goto out; 2162 goto out;
2169 } else if (xoff == nxoff) 2163 } else if (xoff == nxoff)
2170 goto out; 2164 goto out;
2171 2165
2172 if (xoff >= nxoff) { 2166 if (xoff >= nxoff) {
2173 XT_PUTPAGE(mp); 2167 XT_PUTPAGE(mp);
2174 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 2168 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff");
2175 return -EIO; 2169 return -EIO;
2176 } 2170 }
2177 /* #endif _JFS_WIP_COALESCE */ 2171 /* #endif _JFS_WIP_COALESCE */
2178 2172
2179 /* 2173 /*
2180 * split XAD into (lXAD, nXAD): 2174 * split XAD into (lXAD, nXAD):
2181 * 2175 *
2182 * |---nXAD---> 2176 * |---nXAD--->
2183 * --|----------XAD----------|-- 2177 * --|----------XAD----------|--
2184 * |-lXAD-| 2178 * |-lXAD-|
2185 */ 2179 */
2186 updateRight: /* (xoff < nxoff) */ 2180 updateRight: /* (xoff < nxoff) */
2187 /* truncate old XAD as lXAD:not_recorded */ 2181 /* truncate old XAD as lXAD:not_recorded */
2188 xad = &p->xad[index]; 2182 xad = &p->xad[index];
2189 XADlength(xad, nxoff - xoff); 2183 XADlength(xad, nxoff - xoff);
2190 2184
2191 /* insert nXAD:recorded */ 2185 /* insert nXAD:recorded */
2192 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2186 if (nextindex == le16_to_cpu(p->header.maxentry)) {
2193 2187
2194 /* xtSpliUp() unpins leaf pages */ 2188 /* xtSpliUp() unpins leaf pages */
2195 split.mp = mp; 2189 split.mp = mp;
2196 split.index = newindex; 2190 split.index = newindex;
2197 split.flag = xflag & ~XAD_NOTRECORDED; 2191 split.flag = xflag & ~XAD_NOTRECORDED;
2198 split.off = nxoff; 2192 split.off = nxoff;
2199 split.len = nxlen; 2193 split.len = nxlen;
2200 split.addr = nxaddr; 2194 split.addr = nxaddr;
2201 split.pxdlist = NULL; 2195 split.pxdlist = NULL;
2202 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2196 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2203 return rc; 2197 return rc;
2204 2198
2205 /* get back old page */ 2199 /* get back old page */
2206 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2200 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2207 if (rc) 2201 if (rc)
2208 return rc; 2202 return rc;
2209 /* 2203 /*
2210 * if leaf root has been split, original root has been 2204 * if leaf root has been split, original root has been
2211 * copied to new child page, i.e., original entry now 2205 * copied to new child page, i.e., original entry now
2212 * resides on the new child page; 2206 * resides on the new child page;
2213 */ 2207 */
2214 if (p->header.flag & BT_INTERNAL) { 2208 if (p->header.flag & BT_INTERNAL) {
2215 ASSERT(p->header.nextindex == 2209 ASSERT(p->header.nextindex ==
2216 cpu_to_le16(XTENTRYSTART + 1)); 2210 cpu_to_le16(XTENTRYSTART + 1));
2217 xad = &p->xad[XTENTRYSTART]; 2211 xad = &p->xad[XTENTRYSTART];
2218 bn = addressXAD(xad); 2212 bn = addressXAD(xad);
2219 XT_PUTPAGE(mp); 2213 XT_PUTPAGE(mp);
2220 2214
2221 /* get new child page */ 2215 /* get new child page */
2222 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2216 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2223 if (rc) 2217 if (rc)
2224 return rc; 2218 return rc;
2225 2219
2226 BT_MARK_DIRTY(mp, ip); 2220 BT_MARK_DIRTY(mp, ip);
2227 if (!test_cflag(COMMIT_Nolink, ip)) { 2221 if (!test_cflag(COMMIT_Nolink, ip)) {
2228 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2222 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2229 xtlck = (struct xtlock *) & tlck->lock; 2223 xtlck = (struct xtlock *) & tlck->lock;
2230 } 2224 }
2231 } else { 2225 } else {
2232 /* is nXAD on new page ? */ 2226 /* is nXAD on new page ? */
2233 if (newindex > 2227 if (newindex >
2234 (le16_to_cpu(p->header.maxentry) >> 1)) { 2228 (le16_to_cpu(p->header.maxentry) >> 1)) {
2235 newindex = 2229 newindex =
2236 newindex - 2230 newindex -
2237 le16_to_cpu(p->header.nextindex) + 2231 le16_to_cpu(p->header.nextindex) +
2238 XTENTRYSTART; 2232 XTENTRYSTART;
2239 newpage = 1; 2233 newpage = 1;
2240 } 2234 }
2241 } 2235 }
2242 } else { 2236 } else {
2243 /* if insert into middle, shift right remaining entries */ 2237 /* if insert into middle, shift right remaining entries */
2244 if (newindex < nextindex) 2238 if (newindex < nextindex)
2245 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2239 memmove(&p->xad[newindex + 1], &p->xad[newindex],
2246 (nextindex - newindex) << L2XTSLOTSIZE); 2240 (nextindex - newindex) << L2XTSLOTSIZE);
2247 2241
2248 /* insert the entry */ 2242 /* insert the entry */
2249 xad = &p->xad[newindex]; 2243 xad = &p->xad[newindex];
2250 *xad = *nxad; 2244 *xad = *nxad;
2251 xad->flag = xflag & ~XAD_NOTRECORDED; 2245 xad->flag = xflag & ~XAD_NOTRECORDED;
2252 2246
2253 /* advance next available entry index. */ 2247 /* advance next available entry index. */
2254 p->header.nextindex = 2248 p->header.nextindex =
2255 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2249 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2256 } 2250 }
2257 2251
2258 /* 2252 /*
2259 * does nXAD force 3-way split ? 2253 * does nXAD force 3-way split ?
2260 * 2254 *
2261 * |---nXAD--->| 2255 * |---nXAD--->|
2262 * --|----------XAD-------------|-- 2256 * --|----------XAD-------------|--
2263 * |-lXAD-| |-rXAD -| 2257 * |-lXAD-| |-rXAD -|
2264 */ 2258 */
2265 if (nxoff + nxlen == xoff + xlen) 2259 if (nxoff + nxlen == xoff + xlen)
2266 goto out; 2260 goto out;
2267 2261
2268 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2262 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
2269 if (newpage) { 2263 if (newpage) {
2270 /* close out old page */ 2264 /* close out old page */
2271 if (!test_cflag(COMMIT_Nolink, ip)) { 2265 if (!test_cflag(COMMIT_Nolink, ip)) {
2272 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2266 xtlck->lwm.offset = (xtlck->lwm.offset) ?
2273 min(index0, (int)xtlck->lwm.offset) : index0; 2267 min(index0, (int)xtlck->lwm.offset) : index0;
2274 xtlck->lwm.length = 2268 xtlck->lwm.length =
2275 le16_to_cpu(p->header.nextindex) - 2269 le16_to_cpu(p->header.nextindex) -
2276 xtlck->lwm.offset; 2270 xtlck->lwm.offset;
2277 } 2271 }
2278 2272
2279 bn = le64_to_cpu(p->header.next); 2273 bn = le64_to_cpu(p->header.next);
2280 XT_PUTPAGE(mp); 2274 XT_PUTPAGE(mp);
2281 2275
2282 /* get new right page */ 2276 /* get new right page */
2283 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2277 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2284 if (rc) 2278 if (rc)
2285 return rc; 2279 return rc;
2286 2280
2287 BT_MARK_DIRTY(mp, ip); 2281 BT_MARK_DIRTY(mp, ip);
2288 if (!test_cflag(COMMIT_Nolink, ip)) { 2282 if (!test_cflag(COMMIT_Nolink, ip)) {
2289 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2283 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2290 xtlck = (struct xtlock *) & tlck->lock; 2284 xtlck = (struct xtlock *) & tlck->lock;
2291 } 2285 }
2292 2286
2293 index0 = index = newindex; 2287 index0 = index = newindex;
2294 } else 2288 } else
2295 index++; 2289 index++;
2296 2290
2297 newindex = index + 1; 2291 newindex = index + 1;
2298 nextindex = le16_to_cpu(p->header.nextindex); 2292 nextindex = le16_to_cpu(p->header.nextindex);
2299 xlen = xlen - (nxoff - xoff); 2293 xlen = xlen - (nxoff - xoff);
2300 xoff = nxoff; 2294 xoff = nxoff;
2301 xaddr = nxaddr; 2295 xaddr = nxaddr;
2302 2296
2303 /* recompute split pages */ 2297 /* recompute split pages */
2304 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2298 if (nextindex == le16_to_cpu(p->header.maxentry)) {
2305 XT_PUTPAGE(mp); 2299 XT_PUTPAGE(mp);
2306 2300
2307 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2301 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
2308 return rc; 2302 return rc;
2309 2303
2310 /* retrieve search result */ 2304 /* retrieve search result */
2311 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2305 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
2312 2306
2313 if (cmp != 0) { 2307 if (cmp != 0) {
2314 XT_PUTPAGE(mp); 2308 XT_PUTPAGE(mp);
2315 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2309 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed");
2316 return -EIO; 2310 return -EIO;
2317 } 2311 }
2318 2312
2319 if (index0 != index) { 2313 if (index0 != index) {
2320 XT_PUTPAGE(mp); 2314 XT_PUTPAGE(mp);
2321 jfs_error(ip->i_sb, 2315 jfs_error(ip->i_sb,
2322 "xtUpdate: unexpected value of index"); 2316 "xtUpdate: unexpected value of index");
2323 return -EIO; 2317 return -EIO;
2324 } 2318 }
2325 } 2319 }
2326 2320
2327 /* 2321 /*
2328 * split XAD into (nXAD, rXAD) 2322 * split XAD into (nXAD, rXAD)
2329 * 2323 *
2330 * ---nXAD---| 2324 * ---nXAD---|
2331 * --|----------XAD----------|-- 2325 * --|----------XAD----------|--
2332 * |-rXAD-| 2326 * |-rXAD-|
2333 */ 2327 */
2334 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2328 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */
2335 /* update old XAD with nXAD:recorded */ 2329 /* update old XAD with nXAD:recorded */
2336 xad = &p->xad[index]; 2330 xad = &p->xad[index];
2337 *xad = *nxad; 2331 *xad = *nxad;
2338 xad->flag = xflag & ~XAD_NOTRECORDED; 2332 xad->flag = xflag & ~XAD_NOTRECORDED;
2339 2333
2340 /* insert rXAD:not_recorded */ 2334 /* insert rXAD:not_recorded */
2341 xoff = xoff + nxlen; 2335 xoff = xoff + nxlen;
2342 xlen = xlen - nxlen; 2336 xlen = xlen - nxlen;
2343 xaddr = xaddr + nxlen; 2337 xaddr = xaddr + nxlen;
2344 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2338 if (nextindex == le16_to_cpu(p->header.maxentry)) {
2345 /* 2339 /*
2346 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2340 printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2347 */ 2341 */
2348 /* xtSpliUp() unpins leaf pages */ 2342 /* xtSpliUp() unpins leaf pages */
2349 split.mp = mp; 2343 split.mp = mp;
2350 split.index = newindex; 2344 split.index = newindex;
2351 split.flag = xflag; 2345 split.flag = xflag;
2352 split.off = xoff; 2346 split.off = xoff;
2353 split.len = xlen; 2347 split.len = xlen;
2354 split.addr = xaddr; 2348 split.addr = xaddr;
2355 split.pxdlist = NULL; 2349 split.pxdlist = NULL;
2356 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2350 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2357 return rc; 2351 return rc;
2358 2352
2359 /* get back old page */ 2353 /* get back old page */
2360 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2354 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2361 if (rc) 2355 if (rc)
2362 return rc; 2356 return rc;
2363 2357
2364 /* 2358 /*
2365 * if leaf root has been split, original root has been 2359 * if leaf root has been split, original root has been
2366 * copied to new child page, i.e., original entry now 2360 * copied to new child page, i.e., original entry now
2367 * resides on the new child page; 2361 * resides on the new child page;
2368 */ 2362 */
2369 if (p->header.flag & BT_INTERNAL) { 2363 if (p->header.flag & BT_INTERNAL) {
2370 ASSERT(p->header.nextindex == 2364 ASSERT(p->header.nextindex ==
2371 cpu_to_le16(XTENTRYSTART + 1)); 2365 cpu_to_le16(XTENTRYSTART + 1));
2372 xad = &p->xad[XTENTRYSTART]; 2366 xad = &p->xad[XTENTRYSTART];
2373 bn = addressXAD(xad); 2367 bn = addressXAD(xad);
2374 XT_PUTPAGE(mp); 2368 XT_PUTPAGE(mp);
2375 2369
2376 /* get new child page */ 2370 /* get new child page */
2377 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2371 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2378 if (rc) 2372 if (rc)
2379 return rc; 2373 return rc;
2380 2374
2381 BT_MARK_DIRTY(mp, ip); 2375 BT_MARK_DIRTY(mp, ip);
2382 if (!test_cflag(COMMIT_Nolink, ip)) { 2376 if (!test_cflag(COMMIT_Nolink, ip)) {
2383 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2377 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2384 xtlck = (struct xtlock *) & tlck->lock; 2378 xtlck = (struct xtlock *) & tlck->lock;
2385 } 2379 }
2386 } 2380 }
2387 } else { 2381 } else {
2388 /* if insert into middle, shift right remaining entries */ 2382 /* if insert into middle, shift right remaining entries */
2389 if (newindex < nextindex) 2383 if (newindex < nextindex)
2390 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2384 memmove(&p->xad[newindex + 1], &p->xad[newindex],
2391 (nextindex - newindex) << L2XTSLOTSIZE); 2385 (nextindex - newindex) << L2XTSLOTSIZE);
2392 2386
2393 /* insert the entry */ 2387 /* insert the entry */
2394 xad = &p->xad[newindex]; 2388 xad = &p->xad[newindex];
2395 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2389 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2396 2390
2397 /* advance next available entry index. */ 2391 /* advance next available entry index. */
2398 p->header.nextindex = 2392 p->header.nextindex =
2399 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2393 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2400 } 2394 }
2401 2395
2402 out: 2396 out:
2403 if (!test_cflag(COMMIT_Nolink, ip)) { 2397 if (!test_cflag(COMMIT_Nolink, ip)) {
2404 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2398 xtlck->lwm.offset = (xtlck->lwm.offset) ?
2405 min(index0, (int)xtlck->lwm.offset) : index0; 2399 min(index0, (int)xtlck->lwm.offset) : index0;
2406 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2400 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2407 xtlck->lwm.offset; 2401 xtlck->lwm.offset;
2408 } 2402 }
2409 2403
2410 /* unpin the leaf page */ 2404 /* unpin the leaf page */
2411 XT_PUTPAGE(mp); 2405 XT_PUTPAGE(mp);
2412 2406
2413 return rc; 2407 return rc;
2414 } 2408 }
2415 2409
2416 2410
2417 /* 2411 /*
2418 * xtAppend() 2412 * xtAppend()
2419 * 2413 *
2420 * function: grow in append mode from contiguous region specified ; 2414 * function: grow in append mode from contiguous region specified ;
2421 * 2415 *
2422 * parameter: 2416 * parameter:
2423 * tid - transaction id; 2417 * tid - transaction id;
2424 * ip - file object; 2418 * ip - file object;
2425 * xflag - extent flag: 2419 * xflag - extent flag:
2426 * xoff - extent offset; 2420 * xoff - extent offset;
2427 * maxblocks - max extent length; 2421 * maxblocks - max extent length;
2428 * xlen - extent length (in/out); 2422 * xlen - extent length (in/out);
2429 * xaddrp - extent address pointer (in/out): 2423 * xaddrp - extent address pointer (in/out):
2430 * flag - 2424 * flag -
2431 * 2425 *
2432 * return: 2426 * return:
2433 */ 2427 */
2434 int xtAppend(tid_t tid, /* transaction id */ 2428 int xtAppend(tid_t tid, /* transaction id */
2435 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2429 struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2436 s32 * xlenp, /* (in/out) */ 2430 s32 * xlenp, /* (in/out) */
2437 s64 * xaddrp, /* (in/out) */ 2431 s64 * xaddrp, /* (in/out) */
2438 int flag) 2432 int flag)
2439 { 2433 {
2440 int rc = 0; 2434 int rc = 0;
2441 struct metapage *mp; /* meta-page buffer */ 2435 struct metapage *mp; /* meta-page buffer */
2442 xtpage_t *p; /* base B+-tree index page */ 2436 xtpage_t *p; /* base B+-tree index page */
2443 s64 bn, xaddr; 2437 s64 bn, xaddr;
2444 int index, nextindex; 2438 int index, nextindex;
2445 struct btstack btstack; /* traverse stack */ 2439 struct btstack btstack; /* traverse stack */
2446 struct xtsplit split; /* split information */ 2440 struct xtsplit split; /* split information */
2447 xad_t *xad; 2441 xad_t *xad;
2448 int cmp; 2442 int cmp;
2449 struct tlock *tlck; 2443 struct tlock *tlck;
2450 struct xtlock *xtlck; 2444 struct xtlock *xtlck;
2451 int nsplit, nblocks, xlen; 2445 int nsplit, nblocks, xlen;
2452 struct pxdlist pxdlist; 2446 struct pxdlist pxdlist;
2453 pxd_t *pxd; 2447 pxd_t *pxd;
2454 s64 next; 2448 s64 next;
2455 2449
2456 xaddr = *xaddrp; 2450 xaddr = *xaddrp;
2457 xlen = *xlenp; 2451 xlen = *xlenp;
2458 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2452 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2459 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2453 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2460 2454
2461 /* 2455 /*
2462 * search for the entry location at which to insert: 2456 * search for the entry location at which to insert:
2463 * 2457 *
2464 * xtFastSearch() and xtSearch() both returns (leaf page 2458 * xtFastSearch() and xtSearch() both returns (leaf page
2465 * pinned, index at which to insert). 2459 * pinned, index at which to insert).
2466 * n.b. xtSearch() may return index of maxentry of 2460 * n.b. xtSearch() may return index of maxentry of
2467 * the full page. 2461 * the full page.
2468 */ 2462 */
2469 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2463 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
2470 return rc; 2464 return rc;
2471 2465
2472 /* retrieve search result */ 2466 /* retrieve search result */
2473 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2467 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2474 2468
2475 if (cmp == 0) { 2469 if (cmp == 0) {
2476 rc = -EEXIST; 2470 rc = -EEXIST;
2477 goto out; 2471 goto out;
2478 } 2472 }
2479 2473
2480 if (next) 2474 if (next)
2481 xlen = min(xlen, (int)(next - xoff)); 2475 xlen = min(xlen, (int)(next - xoff));
2482 //insert: 2476 //insert:
2483 /* 2477 /*
2484 * insert entry for new extent 2478 * insert entry for new extent
2485 */ 2479 */
2486 xflag |= XAD_NEW; 2480 xflag |= XAD_NEW;
2487 2481
2488 /* 2482 /*
2489 * if the leaf page is full, split the page and 2483 * if the leaf page is full, split the page and
2490 * propagate up the router entry for the new page from split 2484 * propagate up the router entry for the new page from split
2491 * 2485 *
2492 * The xtSplitUp() will insert the entry and unpin the leaf page. 2486 * The xtSplitUp() will insert the entry and unpin the leaf page.
2493 */ 2487 */
2494 nextindex = le16_to_cpu(p->header.nextindex); 2488 nextindex = le16_to_cpu(p->header.nextindex);
2495 if (nextindex < le16_to_cpu(p->header.maxentry)) 2489 if (nextindex < le16_to_cpu(p->header.maxentry))
2496 goto insertLeaf; 2490 goto insertLeaf;
2497 2491
2498 /* 2492 /*
2499 * allocate new index blocks to cover index page split(s) 2493 * allocate new index blocks to cover index page split(s)
2500 */ 2494 */
2501 nsplit = btstack.nsplit; 2495 nsplit = btstack.nsplit;
2502 split.pxdlist = &pxdlist; 2496 split.pxdlist = &pxdlist;
2503 pxdlist.maxnpxd = pxdlist.npxd = 0; 2497 pxdlist.maxnpxd = pxdlist.npxd = 0;
2504 pxd = &pxdlist.pxd[0]; 2498 pxd = &pxdlist.pxd[0];
2505 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2499 nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2506 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2500 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2507 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2501 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2508 PXDaddress(pxd, xaddr); 2502 PXDaddress(pxd, xaddr);
2509 PXDlength(pxd, nblocks); 2503 PXDlength(pxd, nblocks);
2510 2504
2511 pxdlist.maxnpxd++; 2505 pxdlist.maxnpxd++;
2512 2506
2513 continue; 2507 continue;
2514 } 2508 }
2515 2509
2516 /* undo allocation */ 2510 /* undo allocation */
2517 2511
2518 goto out; 2512 goto out;
2519 } 2513 }
2520 2514
2521 xlen = min(xlen, maxblocks); 2515 xlen = min(xlen, maxblocks);
2522 2516
2523 /* 2517 /*
2524 * allocate data extent requested 2518 * allocate data extent requested
2525 */ 2519 */
2526 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2520 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2527 goto out; 2521 goto out;
2528 2522
2529 split.mp = mp; 2523 split.mp = mp;
2530 split.index = index; 2524 split.index = index;
2531 split.flag = xflag; 2525 split.flag = xflag;
2532 split.off = xoff; 2526 split.off = xoff;
2533 split.len = xlen; 2527 split.len = xlen;
2534 split.addr = xaddr; 2528 split.addr = xaddr;
2535 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2529 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2536 /* undo data extent allocation */ 2530 /* undo data extent allocation */
2537 dbFree(ip, *xaddrp, (s64) * xlenp); 2531 dbFree(ip, *xaddrp, (s64) * xlenp);
2538 2532
2539 return rc; 2533 return rc;
2540 } 2534 }
2541 2535
2542 *xaddrp = xaddr; 2536 *xaddrp = xaddr;
2543 *xlenp = xlen; 2537 *xlenp = xlen;
2544 return 0; 2538 return 0;
2545 2539
2546 /* 2540 /*
2547 * insert the new entry into the leaf page 2541 * insert the new entry into the leaf page
2548 */ 2542 */
2549 insertLeaf: 2543 insertLeaf:
2550 /* 2544 /*
2551 * allocate data extent requested 2545 * allocate data extent requested
2552 */ 2546 */
2553 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2547 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2554 goto out; 2548 goto out;
2555 2549
2556 BT_MARK_DIRTY(mp, ip); 2550 BT_MARK_DIRTY(mp, ip);
2557 /* 2551 /*
2558 * acquire a transaction lock on the leaf page; 2552 * acquire a transaction lock on the leaf page;
2559 * 2553 *
2560 * action: xad insertion/extension; 2554 * action: xad insertion/extension;
2561 */ 2555 */
2562 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2556 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2563 xtlck = (struct xtlock *) & tlck->lock; 2557 xtlck = (struct xtlock *) & tlck->lock;
2564 2558
2565 /* insert the new entry: mark the entry NEW */ 2559 /* insert the new entry: mark the entry NEW */
2566 xad = &p->xad[index]; 2560 xad = &p->xad[index];
2567 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2561 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2568 2562
2569 /* advance next available entry index */ 2563 /* advance next available entry index */
2570 p->header.nextindex = 2564 le16_add_cpu(&p->header.nextindex, 1);
2571 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2572 2565
2573 xtlck->lwm.offset = 2566 xtlck->lwm.offset =
2574 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2567 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2575 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2568 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2576 xtlck->lwm.offset; 2569 xtlck->lwm.offset;
2577 2570
2578 *xaddrp = xaddr; 2571 *xaddrp = xaddr;
2579 *xlenp = xlen; 2572 *xlenp = xlen;
2580 2573
2581 out: 2574 out:
2582 /* unpin the leaf page */ 2575 /* unpin the leaf page */
2583 XT_PUTPAGE(mp); 2576 XT_PUTPAGE(mp);
2584 2577
2585 return rc; 2578 return rc;
2586 } 2579 }
2587 #ifdef _STILL_TO_PORT 2580 #ifdef _STILL_TO_PORT
2588 2581
2589 /* - TBD for defragmentaion/reorganization - 2582 /* - TBD for defragmentaion/reorganization -
2590 * 2583 *
2591 * xtDelete() 2584 * xtDelete()
2592 * 2585 *
2593 * function: 2586 * function:
2594 * delete the entry with the specified key. 2587 * delete the entry with the specified key.
2595 * 2588 *
2596 * N.B.: whole extent of the entry is assumed to be deleted. 2589 * N.B.: whole extent of the entry is assumed to be deleted.
2597 * 2590 *
2598 * parameter: 2591 * parameter:
2599 * 2592 *
2600 * return: 2593 * return:
2601 * ENOENT: if the entry is not found. 2594 * ENOENT: if the entry is not found.
2602 * 2595 *
2603 * exception: 2596 * exception:
2604 */ 2597 */
2605 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2598 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2606 { 2599 {
2607 int rc = 0; 2600 int rc = 0;
2608 struct btstack btstack; 2601 struct btstack btstack;
2609 int cmp; 2602 int cmp;
2610 s64 bn; 2603 s64 bn;
2611 struct metapage *mp; 2604 struct metapage *mp;
2612 xtpage_t *p; 2605 xtpage_t *p;
2613 int index, nextindex; 2606 int index, nextindex;
2614 struct tlock *tlck; 2607 struct tlock *tlck;
2615 struct xtlock *xtlck; 2608 struct xtlock *xtlck;
2616 2609
2617 /* 2610 /*
2618 * find the matching entry; xtSearch() pins the page 2611 * find the matching entry; xtSearch() pins the page
2619 */ 2612 */
2620 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2613 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2621 return rc; 2614 return rc;
2622 2615
2623 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2616 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2624 if (cmp) { 2617 if (cmp) {
2625 /* unpin the leaf page */ 2618 /* unpin the leaf page */
2626 XT_PUTPAGE(mp); 2619 XT_PUTPAGE(mp);
2627 return -ENOENT; 2620 return -ENOENT;
2628 } 2621 }
2629 2622
2630 /* 2623 /*
2631 * delete the entry from the leaf page 2624 * delete the entry from the leaf page
2632 */ 2625 */
2633 nextindex = le16_to_cpu(p->header.nextindex); 2626 nextindex = le16_to_cpu(p->header.nextindex);
2634 p->header.nextindex = 2627 le16_add_cpu(&p->header.nextindex, -1);
2635 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
2636 2628
2637 /* 2629 /*
2638 * if the leaf page bocome empty, free the page 2630 * if the leaf page bocome empty, free the page
2639 */ 2631 */
2640 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2632 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
2641 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2633 return (xtDeleteUp(tid, ip, mp, p, &btstack));
2642 2634
2643 BT_MARK_DIRTY(mp, ip); 2635 BT_MARK_DIRTY(mp, ip);
2644 /* 2636 /*
2645 * acquire a transaction lock on the leaf page; 2637 * acquire a transaction lock on the leaf page;
2646 * 2638 *
2647 * action:xad deletion; 2639 * action:xad deletion;
2648 */ 2640 */
2649 tlck = txLock(tid, ip, mp, tlckXTREE); 2641 tlck = txLock(tid, ip, mp, tlckXTREE);
2650 xtlck = (struct xtlock *) & tlck->lock; 2642 xtlck = (struct xtlock *) & tlck->lock;
2651 xtlck->lwm.offset = 2643 xtlck->lwm.offset =
2652 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2644 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
2653 2645
2654 /* if delete from middle, shift left/compact the remaining entries */ 2646 /* if delete from middle, shift left/compact the remaining entries */
2655 if (index < nextindex - 1) 2647 if (index < nextindex - 1)
2656 memmove(&p->xad[index], &p->xad[index + 1], 2648 memmove(&p->xad[index], &p->xad[index + 1],
2657 (nextindex - index - 1) * sizeof(xad_t)); 2649 (nextindex - index - 1) * sizeof(xad_t));
2658 2650
2659 XT_PUTPAGE(mp); 2651 XT_PUTPAGE(mp);
2660 2652
2661 return 0; 2653 return 0;
2662 } 2654 }
2663 2655
2664 2656
2665 /* - TBD for defragmentaion/reorganization - 2657 /* - TBD for defragmentaion/reorganization -
2666 * 2658 *
2667 * xtDeleteUp() 2659 * xtDeleteUp()
2668 * 2660 *
2669 * function: 2661 * function:
2670 * free empty pages as propagating deletion up the tree 2662 * free empty pages as propagating deletion up the tree
2671 * 2663 *
2672 * parameter: 2664 * parameter:
2673 * 2665 *
2674 * return: 2666 * return:
2675 */ 2667 */
2676 static int 2668 static int
2677 xtDeleteUp(tid_t tid, struct inode *ip, 2669 xtDeleteUp(tid_t tid, struct inode *ip,
2678 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2670 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
2679 { 2671 {
2680 int rc = 0; 2672 int rc = 0;
2681 struct metapage *mp; 2673 struct metapage *mp;
2682 xtpage_t *p; 2674 xtpage_t *p;
2683 int index, nextindex; 2675 int index, nextindex;
2684 s64 xaddr; 2676 s64 xaddr;
2685 int xlen; 2677 int xlen;
2686 struct btframe *parent; 2678 struct btframe *parent;
2687 struct tlock *tlck; 2679 struct tlock *tlck;
2688 struct xtlock *xtlck; 2680 struct xtlock *xtlck;
2689 2681
2690 /* 2682 /*
2691 * keep root leaf page which has become empty 2683 * keep root leaf page which has become empty
2692 */ 2684 */
2693 if (fp->header.flag & BT_ROOT) { 2685 if (fp->header.flag & BT_ROOT) {
2694 /* keep the root page */ 2686 /* keep the root page */
2695 fp->header.flag &= ~BT_INTERNAL; 2687 fp->header.flag &= ~BT_INTERNAL;
2696 fp->header.flag |= BT_LEAF; 2688 fp->header.flag |= BT_LEAF;
2697 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2689 fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
2698 2690
2699 /* XT_PUTPAGE(fmp); */ 2691 /* XT_PUTPAGE(fmp); */
2700 2692
2701 return 0; 2693 return 0;
2702 } 2694 }
2703 2695
2704 /* 2696 /*
2705 * free non-root leaf page 2697 * free non-root leaf page
2706 */ 2698 */
2707 if ((rc = xtRelink(tid, ip, fp))) { 2699 if ((rc = xtRelink(tid, ip, fp))) {
2708 XT_PUTPAGE(fmp); 2700 XT_PUTPAGE(fmp);
2709 return rc; 2701 return rc;
2710 } 2702 }
2711 2703
2712 xaddr = addressPXD(&fp->header.self); 2704 xaddr = addressPXD(&fp->header.self);
2713 xlen = lengthPXD(&fp->header.self); 2705 xlen = lengthPXD(&fp->header.self);
2714 /* free the page extent */ 2706 /* free the page extent */
2715 dbFree(ip, xaddr, (s64) xlen); 2707 dbFree(ip, xaddr, (s64) xlen);
2716 2708
2717 /* free the buffer page */ 2709 /* free the buffer page */
2718 discard_metapage(fmp); 2710 discard_metapage(fmp);
2719 2711
2720 /* 2712 /*
2721 * propagate page deletion up the index tree 2713 * propagate page deletion up the index tree
2722 * 2714 *
2723 * If the delete from the parent page makes it empty, 2715 * If the delete from the parent page makes it empty,
2724 * continue all the way up the tree. 2716 * continue all the way up the tree.
2725 * stop if the root page is reached (which is never deleted) or 2717 * stop if the root page is reached (which is never deleted) or
2726 * if the entry deletion does not empty the page. 2718 * if the entry deletion does not empty the page.
2727 */ 2719 */
2728 while ((parent = BT_POP(btstack)) != NULL) { 2720 while ((parent = BT_POP(btstack)) != NULL) {
2729 /* get/pin the parent page <sp> */ 2721 /* get/pin the parent page <sp> */
2730 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2722 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2731 if (rc) 2723 if (rc)
2732 return rc; 2724 return rc;
2733 2725
2734 index = parent->index; 2726 index = parent->index;
2735 2727
2736 /* delete the entry for the freed child page from parent. 2728 /* delete the entry for the freed child page from parent.
2737 */ 2729 */
2738 nextindex = le16_to_cpu(p->header.nextindex); 2730 nextindex = le16_to_cpu(p->header.nextindex);
2739 2731
2740 /* 2732 /*
2741 * the parent has the single entry being deleted: 2733 * the parent has the single entry being deleted:
2742 * free the parent page which has become empty. 2734 * free the parent page which has become empty.
2743 */ 2735 */
2744 if (nextindex == 1) { 2736 if (nextindex == 1) {
2745 if (p->header.flag & BT_ROOT) { 2737 if (p->header.flag & BT_ROOT) {
2746 /* keep the root page */ 2738 /* keep the root page */
2747 p->header.flag &= ~BT_INTERNAL; 2739 p->header.flag &= ~BT_INTERNAL;
2748 p->header.flag |= BT_LEAF; 2740 p->header.flag |= BT_LEAF;
2749 p->header.nextindex = 2741 p->header.nextindex =
2750 cpu_to_le16(XTENTRYSTART); 2742 cpu_to_le16(XTENTRYSTART);
2751 2743
2752 /* XT_PUTPAGE(mp); */ 2744 /* XT_PUTPAGE(mp); */
2753 2745
2754 break; 2746 break;
2755 } else { 2747 } else {
2756 /* free the parent page */ 2748 /* free the parent page */
2757 if ((rc = xtRelink(tid, ip, p))) 2749 if ((rc = xtRelink(tid, ip, p)))
2758 return rc; 2750 return rc;
2759 2751
2760 xaddr = addressPXD(&p->header.self); 2752 xaddr = addressPXD(&p->header.self);
2761 /* free the page extent */ 2753 /* free the page extent */
2762 dbFree(ip, xaddr, 2754 dbFree(ip, xaddr,
2763 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2755 (s64) JFS_SBI(ip->i_sb)->nbperpage);
2764 2756
2765 /* unpin/free the buffer page */ 2757 /* unpin/free the buffer page */
2766 discard_metapage(mp); 2758 discard_metapage(mp);
2767 2759
2768 /* propagate up */ 2760 /* propagate up */
2769 continue; 2761 continue;
2770 } 2762 }
2771 } 2763 }
2772 /* 2764 /*
2773 * the parent has other entries remaining: 2765 * the parent has other entries remaining:
2774 * delete the router entry from the parent page. 2766 * delete the router entry from the parent page.
2775 */ 2767 */
2776 else { 2768 else {
2777 BT_MARK_DIRTY(mp, ip); 2769 BT_MARK_DIRTY(mp, ip);
2778 /* 2770 /*
2779 * acquire a transaction lock on the leaf page; 2771 * acquire a transaction lock on the leaf page;
2780 * 2772 *
2781 * action:xad deletion; 2773 * action:xad deletion;
2782 */ 2774 */
2783 tlck = txLock(tid, ip, mp, tlckXTREE); 2775 tlck = txLock(tid, ip, mp, tlckXTREE);
2784 xtlck = (struct xtlock *) & tlck->lock; 2776 xtlck = (struct xtlock *) & tlck->lock;
2785 xtlck->lwm.offset = 2777 xtlck->lwm.offset =
2786 (xtlck->lwm.offset) ? min(index, 2778 (xtlck->lwm.offset) ? min(index,
2787 xtlck->lwm. 2779 xtlck->lwm.
2788 offset) : index; 2780 offset) : index;
2789 2781
2790 /* if delete from middle, 2782 /* if delete from middle,
2791 * shift left/compact the remaining entries in the page 2783 * shift left/compact the remaining entries in the page
2792 */ 2784 */
2793 if (index < nextindex - 1) 2785 if (index < nextindex - 1)
2794 memmove(&p->xad[index], &p->xad[index + 1], 2786 memmove(&p->xad[index], &p->xad[index + 1],
2795 (nextindex - index - 2787 (nextindex - index -
2796 1) << L2XTSLOTSIZE); 2788 1) << L2XTSLOTSIZE);
2797 2789
2798 p->header.nextindex = 2790 le16_add_cpu(&p->header.nextindex, -1);
2799 cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2800 1);
2801 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2791 jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2802 (ulong) parent->bn, index); 2792 (ulong) parent->bn, index);
2803 } 2793 }
2804 2794
2805 /* unpin the parent page */ 2795 /* unpin the parent page */
2806 XT_PUTPAGE(mp); 2796 XT_PUTPAGE(mp);
2807 2797
2808 /* exit propagation up */ 2798 /* exit propagation up */
2809 break; 2799 break;
2810 } 2800 }
2811 2801
2812 return 0; 2802 return 0;
2813 } 2803 }
2814 2804
2815 2805
2816 /* 2806 /*
2817 * NAME: xtRelocate() 2807 * NAME: xtRelocate()
2818 * 2808 *
2819 * FUNCTION: relocate xtpage or data extent of regular file; 2809 * FUNCTION: relocate xtpage or data extent of regular file;
2820 * This function is mainly used by defragfs utility. 2810 * This function is mainly used by defragfs utility.
2821 * 2811 *
2822 * NOTE: This routine does not have the logic to handle 2812 * NOTE: This routine does not have the logic to handle
2823 * uncommitted allocated extent. The caller should call 2813 * uncommitted allocated extent. The caller should call
2824 * txCommit() to commit all the allocation before call 2814 * txCommit() to commit all the allocation before call
2825 * this routine. 2815 * this routine.
2826 */ 2816 */
2827 int 2817 int
2828 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2818 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2829 s64 nxaddr, /* new xaddr */ 2819 s64 nxaddr, /* new xaddr */
2830 int xtype) 2820 int xtype)
2831 { /* extent type: XTPAGE or DATAEXT */ 2821 { /* extent type: XTPAGE or DATAEXT */
2832 int rc = 0; 2822 int rc = 0;
2833 struct tblock *tblk; 2823 struct tblock *tblk;
2834 struct tlock *tlck; 2824 struct tlock *tlck;
2835 struct xtlock *xtlck; 2825 struct xtlock *xtlck;
2836 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2826 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */
2837 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2827 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */
2838 xad_t *xad; 2828 xad_t *xad;
2839 pxd_t *pxd; 2829 pxd_t *pxd;
2840 s64 xoff, xsize; 2830 s64 xoff, xsize;
2841 int xlen; 2831 int xlen;
2842 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2832 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
2843 cbuf_t *cp; 2833 cbuf_t *cp;
2844 s64 offset, nbytes, nbrd, pno; 2834 s64 offset, nbytes, nbrd, pno;
2845 int nb, npages, nblks; 2835 int nb, npages, nblks;
2846 s64 bn; 2836 s64 bn;
2847 int cmp; 2837 int cmp;
2848 int index; 2838 int index;
2849 struct pxd_lock *pxdlock; 2839 struct pxd_lock *pxdlock;
2850 struct btstack btstack; /* traverse stack */ 2840 struct btstack btstack; /* traverse stack */
2851 2841
2852 xtype = xtype & EXTENT_TYPE; 2842 xtype = xtype & EXTENT_TYPE;
2853 2843
2854 xoff = offsetXAD(oxad); 2844 xoff = offsetXAD(oxad);
2855 oxaddr = addressXAD(oxad); 2845 oxaddr = addressXAD(oxad);
2856 xlen = lengthXAD(oxad); 2846 xlen = lengthXAD(oxad);
2857 2847
2858 /* validate extent offset */ 2848 /* validate extent offset */
2859 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2849 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2860 if (offset >= ip->i_size) 2850 if (offset >= ip->i_size)
2861 return -ESTALE; /* stale extent */ 2851 return -ESTALE; /* stale extent */
2862 2852
2863 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2853 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2864 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2854 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2865 2855
2866 /* 2856 /*
2867 * 1. get and validate the parent xtpage/xad entry 2857 * 1. get and validate the parent xtpage/xad entry
2868 * covering the source extent to be relocated; 2858 * covering the source extent to be relocated;
2869 */ 2859 */
2870 if (xtype == DATAEXT) { 2860 if (xtype == DATAEXT) {
2871 /* search in leaf entry */ 2861 /* search in leaf entry */
2872 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2862 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2873 if (rc) 2863 if (rc)
2874 return rc; 2864 return rc;
2875 2865
2876 /* retrieve search result */ 2866 /* retrieve search result */
2877 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2867 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2878 2868
2879 if (cmp) { 2869 if (cmp) {
2880 XT_PUTPAGE(pmp); 2870 XT_PUTPAGE(pmp);
2881 return -ESTALE; 2871 return -ESTALE;
2882 } 2872 }
2883 2873
2884 /* validate for exact match with a single entry */ 2874 /* validate for exact match with a single entry */
2885 xad = &pp->xad[index]; 2875 xad = &pp->xad[index];
2886 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2876 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
2887 XT_PUTPAGE(pmp); 2877 XT_PUTPAGE(pmp);
2888 return -ESTALE; 2878 return -ESTALE;
2889 } 2879 }
2890 } else { /* (xtype == XTPAGE) */ 2880 } else { /* (xtype == XTPAGE) */
2891 2881
2892 /* search in internal entry */ 2882 /* search in internal entry */
2893 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2883 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
2894 if (rc) 2884 if (rc)
2895 return rc; 2885 return rc;
2896 2886
2897 /* retrieve search result */ 2887 /* retrieve search result */
2898 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2888 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2899 2889
2900 if (cmp) { 2890 if (cmp) {
2901 XT_PUTPAGE(pmp); 2891 XT_PUTPAGE(pmp);
2902 return -ESTALE; 2892 return -ESTALE;
2903 } 2893 }
2904 2894
2905 /* xtSearchNode() validated for exact match with a single entry 2895 /* xtSearchNode() validated for exact match with a single entry
2906 */ 2896 */
2907 xad = &pp->xad[index]; 2897 xad = &pp->xad[index];
2908 } 2898 }
2909 jfs_info("xtRelocate: parent xad entry validated."); 2899 jfs_info("xtRelocate: parent xad entry validated.");
2910 2900
2911 /* 2901 /*
2912 * 2. relocate the extent 2902 * 2. relocate the extent
2913 */ 2903 */
2914 if (xtype == DATAEXT) { 2904 if (xtype == DATAEXT) {
2915 /* if the extent is allocated-but-not-recorded 2905 /* if the extent is allocated-but-not-recorded
2916 * there is no real data to be moved in this extent, 2906 * there is no real data to be moved in this extent,
2917 */ 2907 */
2918 if (xad->flag & XAD_NOTRECORDED) 2908 if (xad->flag & XAD_NOTRECORDED)
2919 goto out; 2909 goto out;
2920 else 2910 else
2921 /* release xtpage for cmRead()/xtLookup() */ 2911 /* release xtpage for cmRead()/xtLookup() */
2922 XT_PUTPAGE(pmp); 2912 XT_PUTPAGE(pmp);
2923 2913
2924 /* 2914 /*
2925 * cmRelocate() 2915 * cmRelocate()
2926 * 2916 *
2927 * copy target data pages to be relocated; 2917 * copy target data pages to be relocated;
2928 * 2918 *
2929 * data extent must start at page boundary and 2919 * data extent must start at page boundary and
2930 * multiple of page size (except the last data extent); 2920 * multiple of page size (except the last data extent);
2931 * read in each page of the source data extent into cbuf, 2921 * read in each page of the source data extent into cbuf,
2932 * update the cbuf extent descriptor of the page to be 2922 * update the cbuf extent descriptor of the page to be
2933 * homeward bound to new dst data extent 2923 * homeward bound to new dst data extent
2934 * copy the data from the old extent to new extent. 2924 * copy the data from the old extent to new extent.
2935 * copy is essential for compressed files to avoid problems 2925 * copy is essential for compressed files to avoid problems
2936 * that can arise if there was a change in compression 2926 * that can arise if there was a change in compression
2937 * algorithms. 2927 * algorithms.
2938 * it is a good strategy because it may disrupt cache 2928 * it is a good strategy because it may disrupt cache
2939 * policy to keep the pages in memory afterwards. 2929 * policy to keep the pages in memory afterwards.
2940 */ 2930 */
2941 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2931 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2942 assert((offset & CM_OFFSET) == 0); 2932 assert((offset & CM_OFFSET) == 0);
2943 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2933 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2944 pno = offset >> CM_L2BSIZE; 2934 pno = offset >> CM_L2BSIZE;
2945 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2935 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2946 /* 2936 /*
2947 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2937 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2948 (offset >> CM_L2BSIZE) + 1; 2938 (offset >> CM_L2BSIZE) + 1;
2949 */ 2939 */
2950 sxaddr = oxaddr; 2940 sxaddr = oxaddr;
2951 dxaddr = nxaddr; 2941 dxaddr = nxaddr;
2952 2942
2953 /* process the request one cache buffer at a time */ 2943 /* process the request one cache buffer at a time */
2954 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2944 for (nbrd = 0; nbrd < nbytes; nbrd += nb,
2955 offset += nb, pno++, npages--) { 2945 offset += nb, pno++, npages--) {
2956 /* compute page size */ 2946 /* compute page size */
2957 nb = min(nbytes - nbrd, CM_BSIZE); 2947 nb = min(nbytes - nbrd, CM_BSIZE);
2958 2948
2959 /* get the cache buffer of the page */ 2949 /* get the cache buffer of the page */
2960 if (rc = cmRead(ip, offset, npages, &cp)) 2950 if (rc = cmRead(ip, offset, npages, &cp))
2961 break; 2951 break;
2962 2952
2963 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2953 assert(addressPXD(&cp->cm_pxd) == sxaddr);
2964 assert(!cp->cm_modified); 2954 assert(!cp->cm_modified);
2965 2955
2966 /* bind buffer with the new extent address */ 2956 /* bind buffer with the new extent address */
2967 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2957 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
2968 cmSetXD(ip, cp, pno, dxaddr, nblks); 2958 cmSetXD(ip, cp, pno, dxaddr, nblks);
2969 2959
2970 /* release the cbuf, mark it as modified */ 2960 /* release the cbuf, mark it as modified */
2971 cmPut(cp, true); 2961 cmPut(cp, true);
2972 2962
2973 dxaddr += nblks; 2963 dxaddr += nblks;
2974 sxaddr += nblks; 2964 sxaddr += nblks;
2975 } 2965 }
2976 2966
2977 /* get back parent page */ 2967 /* get back parent page */
2978 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2968 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2979 return rc; 2969 return rc;
2980 2970
2981 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2971 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2982 jfs_info("xtRelocate: target data extent relocated."); 2972 jfs_info("xtRelocate: target data extent relocated.");
2983 } else { /* (xtype == XTPAGE) */ 2973 } else { /* (xtype == XTPAGE) */
2984 2974
2985 /* 2975 /*
2986 * read in the target xtpage from the source extent; 2976 * read in the target xtpage from the source extent;
2987 */ 2977 */
2988 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2978 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2989 if (rc) { 2979 if (rc) {
2990 XT_PUTPAGE(pmp); 2980 XT_PUTPAGE(pmp);
2991 return rc; 2981 return rc;
2992 } 2982 }
2993 2983
2994 /* 2984 /*
2995 * read in sibling pages if any to update sibling pointers; 2985 * read in sibling pages if any to update sibling pointers;
2996 */ 2986 */
2997 rmp = NULL; 2987 rmp = NULL;
2998 if (p->header.next) { 2988 if (p->header.next) {
2999 nextbn = le64_to_cpu(p->header.next); 2989 nextbn = le64_to_cpu(p->header.next);
3000 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2990 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
3001 if (rc) { 2991 if (rc) {
3002 XT_PUTPAGE(pmp); 2992 XT_PUTPAGE(pmp);
3003 XT_PUTPAGE(mp); 2993 XT_PUTPAGE(mp);
3004 return (rc); 2994 return (rc);
3005 } 2995 }
3006 } 2996 }
3007 2997
3008 lmp = NULL; 2998 lmp = NULL;
3009 if (p->header.prev) { 2999 if (p->header.prev) {
3010 prevbn = le64_to_cpu(p->header.prev); 3000 prevbn = le64_to_cpu(p->header.prev);
3011 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 3001 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
3012 if (rc) { 3002 if (rc) {
3013 XT_PUTPAGE(pmp); 3003 XT_PUTPAGE(pmp);
3014 XT_PUTPAGE(mp); 3004 XT_PUTPAGE(mp);
3015 if (rmp) 3005 if (rmp)
3016 XT_PUTPAGE(rmp); 3006 XT_PUTPAGE(rmp);
3017 return (rc); 3007 return (rc);
3018 } 3008 }
3019 } 3009 }
3020 3010
3021 /* at this point, all xtpages to be updated are in memory */ 3011 /* at this point, all xtpages to be updated are in memory */
3022 3012
3023 /* 3013 /*
3024 * update sibling pointers of sibling xtpages if any; 3014 * update sibling pointers of sibling xtpages if any;
3025 */ 3015 */
3026 if (lmp) { 3016 if (lmp) {
3027 BT_MARK_DIRTY(lmp, ip); 3017 BT_MARK_DIRTY(lmp, ip);
3028 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 3018 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
3029 lp->header.next = cpu_to_le64(nxaddr); 3019 lp->header.next = cpu_to_le64(nxaddr);
3030 XT_PUTPAGE(lmp); 3020 XT_PUTPAGE(lmp);
3031 } 3021 }
3032 3022
3033 if (rmp) { 3023 if (rmp) {
3034 BT_MARK_DIRTY(rmp, ip); 3024 BT_MARK_DIRTY(rmp, ip);
3035 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 3025 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
3036 rp->header.prev = cpu_to_le64(nxaddr); 3026 rp->header.prev = cpu_to_le64(nxaddr);
3037 XT_PUTPAGE(rmp); 3027 XT_PUTPAGE(rmp);
3038 } 3028 }
3039 3029
3040 /* 3030 /*
3041 * update the target xtpage to be relocated 3031 * update the target xtpage to be relocated
3042 * 3032 *
3043 * update the self address of the target page 3033 * update the self address of the target page
3044 * and write to destination extent; 3034 * and write to destination extent;
3045 * redo image covers the whole xtpage since it is new page 3035 * redo image covers the whole xtpage since it is new page
3046 * to the destination extent; 3036 * to the destination extent;
3047 * update of bmap for the free of source extent 3037 * update of bmap for the free of source extent
3048 * of the target xtpage itself: 3038 * of the target xtpage itself:
3049 * update of bmap for the allocation of destination extent 3039 * update of bmap for the allocation of destination extent
3050 * of the target xtpage itself: 3040 * of the target xtpage itself:
3051 * update of bmap for the extents covered by xad entries in 3041 * update of bmap for the extents covered by xad entries in
3052 * the target xtpage is not necessary since they are not 3042 * the target xtpage is not necessary since they are not
3053 * updated; 3043 * updated;
3054 * if not committed before this relocation, 3044 * if not committed before this relocation,
3055 * target page may contain XAD_NEW entries which must 3045 * target page may contain XAD_NEW entries which must
3056 * be scanned for bmap update (logredo() always 3046 * be scanned for bmap update (logredo() always
3057 * scan xtpage REDOPAGE image for bmap update); 3047 * scan xtpage REDOPAGE image for bmap update);
3058 * if committed before this relocation (tlckRELOCATE), 3048 * if committed before this relocation (tlckRELOCATE),
3059 * scan may be skipped by commit() and logredo(); 3049 * scan may be skipped by commit() and logredo();
3060 */ 3050 */
3061 BT_MARK_DIRTY(mp, ip); 3051 BT_MARK_DIRTY(mp, ip);
3062 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3052 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
3063 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3053 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
3064 xtlck = (struct xtlock *) & tlck->lock; 3054 xtlck = (struct xtlock *) & tlck->lock;
3065 3055
3066 /* update the self address in the xtpage header */ 3056 /* update the self address in the xtpage header */
3067 pxd = &p->header.self; 3057 pxd = &p->header.self;
3068 PXDaddress(pxd, nxaddr); 3058 PXDaddress(pxd, nxaddr);
3069 3059
3070 /* linelock for the after image of the whole page */ 3060 /* linelock for the after image of the whole page */
3071 xtlck->lwm.length = 3061 xtlck->lwm.length =
3072 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 3062 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
3073 3063
3074 /* update the buffer extent descriptor of target xtpage */ 3064 /* update the buffer extent descriptor of target xtpage */
3075 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 3065 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
3076 bmSetXD(mp, nxaddr, xsize); 3066 bmSetXD(mp, nxaddr, xsize);
3077 3067
3078 /* unpin the target page to new homeward bound */ 3068 /* unpin the target page to new homeward bound */
3079 XT_PUTPAGE(mp); 3069 XT_PUTPAGE(mp);
3080 jfs_info("xtRelocate: target xtpage relocated."); 3070 jfs_info("xtRelocate: target xtpage relocated.");
3081 } 3071 }
3082 3072
3083 /* 3073 /*
3084 * 3. acquire maplock for the source extent to be freed; 3074 * 3. acquire maplock for the source extent to be freed;
3085 * 3075 *
3086 * acquire a maplock saving the src relocated extent address; 3076 * acquire a maplock saving the src relocated extent address;
3087 * to free of the extent at commit time; 3077 * to free of the extent at commit time;
3088 */ 3078 */
3089 out: 3079 out:
3090 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 3080 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
3091 * free PXD of the source data extent (logredo() will update 3081 * free PXD of the source data extent (logredo() will update
3092 * bmap for free of source data extent), and update bmap for 3082 * bmap for free of source data extent), and update bmap for
3093 * free of the source data extent; 3083 * free of the source data extent;
3094 */ 3084 */
3095 if (xtype == DATAEXT) 3085 if (xtype == DATAEXT)
3096 tlck = txMaplock(tid, ip, tlckMAP); 3086 tlck = txMaplock(tid, ip, tlckMAP);
3097 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 3087 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
3098 * for the source xtpage (logredo() will init NoRedoPage 3088 * for the source xtpage (logredo() will init NoRedoPage
3099 * filter and will also update bmap for free of the source 3089 * filter and will also update bmap for free of the source
3100 * xtpage), and update bmap for free of the source xtpage; 3090 * xtpage), and update bmap for free of the source xtpage;
3101 * N.B. We use tlckMAP instead of tlkcXTREE because there 3091 * N.B. We use tlckMAP instead of tlkcXTREE because there
3102 * is no buffer associated with this lock since the buffer 3092 * is no buffer associated with this lock since the buffer
3103 * has been redirected to the target location. 3093 * has been redirected to the target location.
3104 */ 3094 */
3105 else /* (xtype == XTPAGE) */ 3095 else /* (xtype == XTPAGE) */
3106 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3096 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
3107 3097
3108 pxdlock = (struct pxd_lock *) & tlck->lock; 3098 pxdlock = (struct pxd_lock *) & tlck->lock;
3109 pxdlock->flag = mlckFREEPXD; 3099 pxdlock->flag = mlckFREEPXD;
3110 PXDaddress(&pxdlock->pxd, oxaddr); 3100 PXDaddress(&pxdlock->pxd, oxaddr);
3111 PXDlength(&pxdlock->pxd, xlen); 3101 PXDlength(&pxdlock->pxd, xlen);
3112 pxdlock->index = 1; 3102 pxdlock->index = 1;
3113 3103
3114 /* 3104 /*
3115 * 4. update the parent xad entry for relocation; 3105 * 4. update the parent xad entry for relocation;
3116 * 3106 *
3117 * acquire tlck for the parent entry with XAD_NEW as entry 3107 * acquire tlck for the parent entry with XAD_NEW as entry
3118 * update which will write LOG_REDOPAGE and update bmap for 3108 * update which will write LOG_REDOPAGE and update bmap for
3119 * allocation of XAD_NEW destination extent; 3109 * allocation of XAD_NEW destination extent;
3120 */ 3110 */
3121 jfs_info("xtRelocate: update parent xad entry."); 3111 jfs_info("xtRelocate: update parent xad entry.");
3122 BT_MARK_DIRTY(pmp, ip); 3112 BT_MARK_DIRTY(pmp, ip);
3123 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 3113 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
3124 xtlck = (struct xtlock *) & tlck->lock; 3114 xtlck = (struct xtlock *) & tlck->lock;
3125 3115
3126 /* update the XAD with the new destination extent; */ 3116 /* update the XAD with the new destination extent; */
3127 xad = &pp->xad[index]; 3117 xad = &pp->xad[index];
3128 xad->flag |= XAD_NEW; 3118 xad->flag |= XAD_NEW;
3129 XADaddress(xad, nxaddr); 3119 XADaddress(xad, nxaddr);
3130 3120
3131 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 3121 xtlck->lwm.offset = min(index, xtlck->lwm.offset);
3132 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 3122 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
3133 xtlck->lwm.offset; 3123 xtlck->lwm.offset;
3134 3124
3135 /* unpin the parent xtpage */ 3125 /* unpin the parent xtpage */
3136 XT_PUTPAGE(pmp); 3126 XT_PUTPAGE(pmp);
3137 3127
3138 return rc; 3128 return rc;
3139 } 3129 }
3140 3130
3141 3131
3142 /* 3132 /*
3143 * xtSearchNode() 3133 * xtSearchNode()
3144 * 3134 *
3145 * function: search for the internal xad entry covering specified extent. 3135 * function: search for the internal xad entry covering specified extent.
3146 * This function is mainly used by defragfs utility. 3136 * This function is mainly used by defragfs utility.
3147 * 3137 *
3148 * parameters: 3138 * parameters:
3149 * ip - file object; 3139 * ip - file object;
3150 * xad - extent to find; 3140 * xad - extent to find;
3151 * cmpp - comparison result: 3141 * cmpp - comparison result:
3152 * btstack - traverse stack; 3142 * btstack - traverse stack;
3153 * flag - search process flag; 3143 * flag - search process flag;
3154 * 3144 *
3155 * returns: 3145 * returns:
3156 * btstack contains (bn, index) of search path traversed to the entry. 3146 * btstack contains (bn, index) of search path traversed to the entry.
3157 * *cmpp is set to result of comparison with the entry returned. 3147 * *cmpp is set to result of comparison with the entry returned.
3158 * the page containing the entry is pinned at exit. 3148 * the page containing the entry is pinned at exit.
3159 */ 3149 */
3160 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3150 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3161 int *cmpp, struct btstack * btstack, int flag) 3151 int *cmpp, struct btstack * btstack, int flag)
3162 { 3152 {
3163 int rc = 0; 3153 int rc = 0;
3164 s64 xoff, xaddr; 3154 s64 xoff, xaddr;
3165 int xlen; 3155 int xlen;
3166 int cmp = 1; /* init for empty page */ 3156 int cmp = 1; /* init for empty page */
3167 s64 bn; /* block number */ 3157 s64 bn; /* block number */
3168 struct metapage *mp; /* meta-page buffer */ 3158 struct metapage *mp; /* meta-page buffer */
3169 xtpage_t *p; /* page */ 3159 xtpage_t *p; /* page */
3170 int base, index, lim; 3160 int base, index, lim;
3171 struct btframe *btsp; 3161 struct btframe *btsp;
3172 s64 t64; 3162 s64 t64;
3173 3163
3174 BT_CLR(btstack); 3164 BT_CLR(btstack);
3175 3165
3176 xoff = offsetXAD(xad); 3166 xoff = offsetXAD(xad);
3177 xlen = lengthXAD(xad); 3167 xlen = lengthXAD(xad);
3178 xaddr = addressXAD(xad); 3168 xaddr = addressXAD(xad);
3179 3169
3180 /* 3170 /*
3181 * search down tree from root: 3171 * search down tree from root:
3182 * 3172 *
3183 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3173 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
3184 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3174 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
3185 * 3175 *
3186 * if entry with search key K is not found 3176 * if entry with search key K is not found
3187 * internal page search find the entry with largest key Ki 3177 * internal page search find the entry with largest key Ki
3188 * less than K which point to the child page to search; 3178 * less than K which point to the child page to search;
3189 * leaf page search find the entry with smallest key Kj 3179 * leaf page search find the entry with smallest key Kj
3190 * greater than K so that the returned index is the position of 3180 * greater than K so that the returned index is the position of
3191 * the entry to be shifted right for insertion of new entry. 3181 * the entry to be shifted right for insertion of new entry.
3192 * for empty tree, search key is greater than any key of the tree. 3182 * for empty tree, search key is greater than any key of the tree.
3193 * 3183 *
3194 * by convention, root bn = 0. 3184 * by convention, root bn = 0.
3195 */ 3185 */
3196 for (bn = 0;;) { 3186 for (bn = 0;;) {
3197 /* get/pin the page to search */ 3187 /* get/pin the page to search */
3198 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3188 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3199 if (rc) 3189 if (rc)
3200 return rc; 3190 return rc;
3201 if (p->header.flag & BT_LEAF) { 3191 if (p->header.flag & BT_LEAF) {
3202 XT_PUTPAGE(mp); 3192 XT_PUTPAGE(mp);
3203 return -ESTALE; 3193 return -ESTALE;
3204 } 3194 }
3205 3195
3206 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3196 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3207 3197
3208 /* 3198 /*
3209 * binary search with search key K on the current page 3199 * binary search with search key K on the current page
3210 */ 3200 */
3211 for (base = XTENTRYSTART; lim; lim >>= 1) { 3201 for (base = XTENTRYSTART; lim; lim >>= 1) {
3212 index = base + (lim >> 1); 3202 index = base + (lim >> 1);
3213 3203
3214 XT_CMP(cmp, xoff, &p->xad[index], t64); 3204 XT_CMP(cmp, xoff, &p->xad[index], t64);
3215 if (cmp == 0) { 3205 if (cmp == 0) {
3216 /* 3206 /*
3217 * search hit 3207 * search hit
3218 * 3208 *
3219 * verify for exact match; 3209 * verify for exact match;
3220 */ 3210 */
3221 if (xaddr == addressXAD(&p->xad[index]) && 3211 if (xaddr == addressXAD(&p->xad[index]) &&
3222 xoff == offsetXAD(&p->xad[index])) { 3212 xoff == offsetXAD(&p->xad[index])) {
3223 *cmpp = cmp; 3213 *cmpp = cmp;
3224 3214
3225 /* save search result */ 3215 /* save search result */
3226 btsp = btstack->top; 3216 btsp = btstack->top;
3227 btsp->bn = bn; 3217 btsp->bn = bn;
3228 btsp->index = index; 3218 btsp->index = index;
3229 btsp->mp = mp; 3219 btsp->mp = mp;
3230 3220
3231 return 0; 3221 return 0;
3232 } 3222 }
3233 3223
3234 /* descend/search its child page */ 3224 /* descend/search its child page */
3235 goto next; 3225 goto next;
3236 } 3226 }
3237 3227
3238 if (cmp > 0) { 3228 if (cmp > 0) {
3239 base = index + 1; 3229 base = index + 1;
3240 --lim; 3230 --lim;
3241 } 3231 }
3242 } 3232 }
3243 3233
3244 /* 3234 /*
3245 * search miss - non-leaf page: 3235 * search miss - non-leaf page:
3246 * 3236 *
3247 * base is the smallest index with key (Kj) greater than 3237 * base is the smallest index with key (Kj) greater than
3248 * search key (K) and may be zero or maxentry index. 3238 * search key (K) and may be zero or maxentry index.
3249 * if base is non-zero, decrement base by one to get the parent 3239 * if base is non-zero, decrement base by one to get the parent
3250 * entry of the child page to search. 3240 * entry of the child page to search.
3251 */ 3241 */
3252 index = base ? base - 1 : base; 3242 index = base ? base - 1 : base;
3253 3243
3254 /* 3244 /*
3255 * go down to child page 3245 * go down to child page
3256 */ 3246 */
3257 next: 3247 next:
3258 /* get the child page block number */ 3248 /* get the child page block number */
3259 bn = addressXAD(&p->xad[index]); 3249 bn = addressXAD(&p->xad[index]);
3260 3250
3261 /* unpin the parent page */ 3251 /* unpin the parent page */
3262 XT_PUTPAGE(mp); 3252 XT_PUTPAGE(mp);
3263 } 3253 }
3264 } 3254 }
3265 3255
3266 3256
3267 /* 3257 /*
3268 * xtRelink() 3258 * xtRelink()
3269 * 3259 *
3270 * function: 3260 * function:
3271 * link around a freed page. 3261 * link around a freed page.
3272 * 3262 *
3273 * Parameter: 3263 * Parameter:
3274 * int tid, 3264 * int tid,
3275 * struct inode *ip, 3265 * struct inode *ip,
3276 * xtpage_t *p) 3266 * xtpage_t *p)
3277 * 3267 *
3278 * returns: 3268 * returns:
3279 */ 3269 */
3280 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3270 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
3281 { 3271 {
3282 int rc = 0; 3272 int rc = 0;
3283 struct metapage *mp; 3273 struct metapage *mp;
3284 s64 nextbn, prevbn; 3274 s64 nextbn, prevbn;
3285 struct tlock *tlck; 3275 struct tlock *tlck;
3286 3276
3287 nextbn = le64_to_cpu(p->header.next); 3277 nextbn = le64_to_cpu(p->header.next);
3288 prevbn = le64_to_cpu(p->header.prev); 3278 prevbn = le64_to_cpu(p->header.prev);
3289 3279
3290 /* update prev pointer of the next page */ 3280 /* update prev pointer of the next page */
3291 if (nextbn != 0) { 3281 if (nextbn != 0) {
3292 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3282 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
3293 if (rc) 3283 if (rc)
3294 return rc; 3284 return rc;
3295 3285
3296 /* 3286 /*
3297 * acquire a transaction lock on the page; 3287 * acquire a transaction lock on the page;
3298 * 3288 *
3299 * action: update prev pointer; 3289 * action: update prev pointer;
3300 */ 3290 */
3301 BT_MARK_DIRTY(mp, ip); 3291 BT_MARK_DIRTY(mp, ip);
3302 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3292 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3303 3293
3304 /* the page may already have been tlock'd */ 3294 /* the page may already have been tlock'd */
3305 3295
3306 p->header.prev = cpu_to_le64(prevbn); 3296 p->header.prev = cpu_to_le64(prevbn);
3307 3297
3308 XT_PUTPAGE(mp); 3298 XT_PUTPAGE(mp);
3309 } 3299 }
3310 3300
3311 /* update next pointer of the previous page */ 3301 /* update next pointer of the previous page */
3312 if (prevbn != 0) { 3302 if (prevbn != 0) {
3313 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3303 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
3314 if (rc) 3304 if (rc)
3315 return rc; 3305 return rc;
3316 3306
3317 /* 3307 /*
3318 * acquire a transaction lock on the page; 3308 * acquire a transaction lock on the page;
3319 * 3309 *
3320 * action: update next pointer; 3310 * action: update next pointer;
3321 */ 3311 */
3322 BT_MARK_DIRTY(mp, ip); 3312 BT_MARK_DIRTY(mp, ip);
3323 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3313 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3324 3314
3325 /* the page may already have been tlock'd */ 3315 /* the page may already have been tlock'd */
3326 3316
3327 p->header.next = le64_to_cpu(nextbn); 3317 p->header.next = le64_to_cpu(nextbn);
3328 3318
3329 XT_PUTPAGE(mp); 3319 XT_PUTPAGE(mp);
3330 } 3320 }
3331 3321
3332 return 0; 3322 return 0;
3333 } 3323 }
3334 #endif /* _STILL_TO_PORT */ 3324 #endif /* _STILL_TO_PORT */
3335 3325
3336 3326
3337 /* 3327 /*
3338 * xtInitRoot() 3328 * xtInitRoot()
3339 * 3329 *
3340 * initialize file root (inline in inode) 3330 * initialize file root (inline in inode)
3341 */ 3331 */
3342 void xtInitRoot(tid_t tid, struct inode *ip) 3332 void xtInitRoot(tid_t tid, struct inode *ip)
3343 { 3333 {
3344 xtpage_t *p; 3334 xtpage_t *p;
3345 3335
3346 /* 3336 /*
3347 * acquire a transaction lock on the root 3337 * acquire a transaction lock on the root
3348 * 3338 *
3349 * action: 3339 * action:
3350 */ 3340 */
3351 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3341 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
3352 tlckXTREE | tlckNEW); 3342 tlckXTREE | tlckNEW);
3353 p = &JFS_IP(ip)->i_xtroot; 3343 p = &JFS_IP(ip)->i_xtroot;
3354 3344
3355 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3345 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
3356 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3346 p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3357 3347
3358 if (S_ISDIR(ip->i_mode)) 3348 if (S_ISDIR(ip->i_mode))
3359 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3349 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
3360 else { 3350 else {
3361 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3351 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
3362 ip->i_size = 0; 3352 ip->i_size = 0;
3363 } 3353 }
3364 3354
3365 3355
3366 return; 3356 return;
3367 } 3357 }
3368 3358
3369 3359
3370 /* 3360 /*
3371 * We can run into a deadlock truncating a file with a large number of 3361 * We can run into a deadlock truncating a file with a large number of
3372 * xtree pages (large fragmented file). A robust fix would entail a 3362 * xtree pages (large fragmented file). A robust fix would entail a
3373 * reservation system where we would reserve a number of metadata pages 3363 * reservation system where we would reserve a number of metadata pages
3374 * and tlocks which we would be guaranteed without a deadlock. Without 3364 * and tlocks which we would be guaranteed without a deadlock. Without
3375 * this, a partial fix is to limit number of metadata pages we will lock 3365 * this, a partial fix is to limit number of metadata pages we will lock
3376 * in a single transaction. Currently we will truncate the file so that 3366 * in a single transaction. Currently we will truncate the file so that
3377 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3367 * no more than 50 leaf pages will be locked. The caller of xtTruncate
3378 * will be responsible for ensuring that the current transaction gets 3368 * will be responsible for ensuring that the current transaction gets
3379 * committed, and that subsequent transactions are created to truncate 3369 * committed, and that subsequent transactions are created to truncate
3380 * the file further if needed. 3370 * the file further if needed.
3381 */ 3371 */
3382 #define MAX_TRUNCATE_LEAVES 50 3372 #define MAX_TRUNCATE_LEAVES 50
3383 3373
3384 /* 3374 /*
3385 * xtTruncate() 3375 * xtTruncate()
3386 * 3376 *
3387 * function: 3377 * function:
3388 * traverse for truncation logging backward bottom up; 3378 * traverse for truncation logging backward bottom up;
3389 * terminate at the last extent entry at the current subtree 3379 * terminate at the last extent entry at the current subtree
3390 * root page covering new down size. 3380 * root page covering new down size.
3391 * truncation may occur within the last extent entry. 3381 * truncation may occur within the last extent entry.
3392 * 3382 *
3393 * parameter: 3383 * parameter:
3394 * int tid, 3384 * int tid,
3395 * struct inode *ip, 3385 * struct inode *ip,
3396 * s64 newsize, 3386 * s64 newsize,
3397 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3387 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3398 * 3388 *
3399 * return: 3389 * return:
3400 * 3390 *
3401 * note: 3391 * note:
3402 * PWMAP: 3392 * PWMAP:
3403 * 1. truncate (non-COMMIT_NOLINK file) 3393 * 1. truncate (non-COMMIT_NOLINK file)
3404 * by jfs_truncate() or jfs_open(O_TRUNC): 3394 * by jfs_truncate() or jfs_open(O_TRUNC):
3405 * xtree is updated; 3395 * xtree is updated;
3406 * 2. truncate index table of directory when last entry removed 3396 * 2. truncate index table of directory when last entry removed
3407 * map update via tlock at commit time; 3397 * map update via tlock at commit time;
3408 * PMAP: 3398 * PMAP:
3409 * Call xtTruncate_pmap instead 3399 * Call xtTruncate_pmap instead
3410 * WMAP: 3400 * WMAP:
3411 * 1. remove (free zero link count) on last reference release 3401 * 1. remove (free zero link count) on last reference release
3412 * (pmap has been freed at commit zero link count); 3402 * (pmap has been freed at commit zero link count);
3413 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3403 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3414 * xtree is updated; 3404 * xtree is updated;
3415 * map update directly at truncation time; 3405 * map update directly at truncation time;
3416 * 3406 *
3417 * if (DELETE) 3407 * if (DELETE)
3418 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3408 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3419 * else if (TRUNCATE) 3409 * else if (TRUNCATE)
3420 * must write LOG_NOREDOPAGE for deleted index page; 3410 * must write LOG_NOREDOPAGE for deleted index page;
3421 * 3411 *
3422 * pages may already have been tlocked by anonymous transactions 3412 * pages may already have been tlocked by anonymous transactions
3423 * during file growth (i.e., write) before truncation; 3413 * during file growth (i.e., write) before truncation;
3424 * 3414 *
3425 * except last truncated entry, deleted entries remains as is 3415 * except last truncated entry, deleted entries remains as is
3426 * in the page (nextindex is updated) for other use 3416 * in the page (nextindex is updated) for other use
3427 * (e.g., log/update allocation map): this avoid copying the page 3417 * (e.g., log/update allocation map): this avoid copying the page
3428 * info but delay free of pages; 3418 * info but delay free of pages;
3429 * 3419 *
3430 */ 3420 */
3431 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3421 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3432 { 3422 {
3433 int rc = 0; 3423 int rc = 0;
3434 s64 teof; 3424 s64 teof;
3435 struct metapage *mp; 3425 struct metapage *mp;
3436 xtpage_t *p; 3426 xtpage_t *p;
3437 s64 bn; 3427 s64 bn;
3438 int index, nextindex; 3428 int index, nextindex;
3439 xad_t *xad; 3429 xad_t *xad;
3440 s64 xoff, xaddr; 3430 s64 xoff, xaddr;
3441 int xlen, len, freexlen; 3431 int xlen, len, freexlen;
3442 struct btstack btstack; 3432 struct btstack btstack;
3443 struct btframe *parent; 3433 struct btframe *parent;
3444 struct tblock *tblk = NULL; 3434 struct tblock *tblk = NULL;
3445 struct tlock *tlck = NULL; 3435 struct tlock *tlck = NULL;
3446 struct xtlock *xtlck = NULL; 3436 struct xtlock *xtlck = NULL;
3447 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3437 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */
3448 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3438 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */
3449 s64 nfreed; 3439 s64 nfreed;
3450 int freed, log; 3440 int freed, log;
3451 int locked_leaves = 0; 3441 int locked_leaves = 0;
3452 3442
3453 /* save object truncation type */ 3443 /* save object truncation type */
3454 if (tid) { 3444 if (tid) {
3455 tblk = tid_to_tblock(tid); 3445 tblk = tid_to_tblock(tid);
3456 tblk->xflag |= flag; 3446 tblk->xflag |= flag;
3457 } 3447 }
3458 3448
3459 nfreed = 0; 3449 nfreed = 0;
3460 3450
3461 flag &= COMMIT_MAP; 3451 flag &= COMMIT_MAP;
3462 assert(flag != COMMIT_PMAP); 3452 assert(flag != COMMIT_PMAP);
3463 3453
3464 if (flag == COMMIT_PWMAP) 3454 if (flag == COMMIT_PWMAP)
3465 log = 1; 3455 log = 1;
3466 else { 3456 else {
3467 log = 0; 3457 log = 0;
3468 xadlock.flag = mlckFREEXADLIST; 3458 xadlock.flag = mlckFREEXADLIST;
3469 xadlock.index = 1; 3459 xadlock.index = 1;
3470 } 3460 }
3471 3461
3472 /* 3462 /*
3473 * if the newsize is not an integral number of pages, 3463 * if the newsize is not an integral number of pages,
3474 * the file between newsize and next page boundary will 3464 * the file between newsize and next page boundary will
3475 * be cleared. 3465 * be cleared.
3476 * if truncating into a file hole, it will cause 3466 * if truncating into a file hole, it will cause
3477 * a full block to be allocated for the logical block. 3467 * a full block to be allocated for the logical block.
3478 */ 3468 */
3479 3469
3480 /* 3470 /*
3481 * release page blocks of truncated region <teof, eof> 3471 * release page blocks of truncated region <teof, eof>
3482 * 3472 *
3483 * free the data blocks from the leaf index blocks. 3473 * free the data blocks from the leaf index blocks.
3484 * delete the parent index entries corresponding to 3474 * delete the parent index entries corresponding to
3485 * the freed child data/index blocks. 3475 * the freed child data/index blocks.
3486 * free the index blocks themselves which aren't needed 3476 * free the index blocks themselves which aren't needed
3487 * in new sized file. 3477 * in new sized file.
3488 * 3478 *
3489 * index blocks are updated only if the blocks are to be 3479 * index blocks are updated only if the blocks are to be
3490 * retained in the new sized file. 3480 * retained in the new sized file.
3491 * if type is PMAP, the data and index pages are NOT 3481 * if type is PMAP, the data and index pages are NOT
3492 * freed, and the data and index blocks are NOT freed 3482 * freed, and the data and index blocks are NOT freed
3493 * from working map. 3483 * from working map.
3494 * (this will allow continued access of data/index of 3484 * (this will allow continued access of data/index of
3495 * temporary file (zerolink count file truncated to zero-length)). 3485 * temporary file (zerolink count file truncated to zero-length)).
3496 */ 3486 */
3497 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3487 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
3498 JFS_SBI(ip->i_sb)->l2bsize; 3488 JFS_SBI(ip->i_sb)->l2bsize;
3499 3489
3500 /* clear stack */ 3490 /* clear stack */
3501 BT_CLR(&btstack); 3491 BT_CLR(&btstack);
3502 3492
3503 /* 3493 /*
3504 * start with root 3494 * start with root
3505 * 3495 *
3506 * root resides in the inode 3496 * root resides in the inode
3507 */ 3497 */
3508 bn = 0; 3498 bn = 0;
3509 3499
3510 /* 3500 /*
3511 * first access of each page: 3501 * first access of each page:
3512 */ 3502 */
3513 getPage: 3503 getPage:
3514 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3504 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3515 if (rc) 3505 if (rc)
3516 return rc; 3506 return rc;
3517 3507
3518 /* process entries backward from last index */ 3508 /* process entries backward from last index */
3519 index = le16_to_cpu(p->header.nextindex) - 1; 3509 index = le16_to_cpu(p->header.nextindex) - 1;
3520 3510
3521 3511
3522 /* Since this is the rightmost page at this level, and we may have 3512 /* Since this is the rightmost page at this level, and we may have
3523 * already freed a page that was formerly to the right, let's make 3513 * already freed a page that was formerly to the right, let's make
3524 * sure that the next pointer is zero. 3514 * sure that the next pointer is zero.
3525 */ 3515 */
3526 if (p->header.next) { 3516 if (p->header.next) {
3527 if (log) 3517 if (log)
3528 /* 3518 /*
3529 * Make sure this change to the header is logged. 3519 * Make sure this change to the header is logged.
3530 * If we really truncate this leaf, the flag 3520 * If we really truncate this leaf, the flag
3531 * will be changed to tlckTRUNCATE 3521 * will be changed to tlckTRUNCATE
3532 */ 3522 */
3533 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3523 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
3534 BT_MARK_DIRTY(mp, ip); 3524 BT_MARK_DIRTY(mp, ip);
3535 p->header.next = 0; 3525 p->header.next = 0;
3536 } 3526 }
3537 3527
3538 if (p->header.flag & BT_INTERNAL) 3528 if (p->header.flag & BT_INTERNAL)
3539 goto getChild; 3529 goto getChild;
3540 3530
3541 /* 3531 /*
3542 * leaf page 3532 * leaf page
3543 */ 3533 */
3544 freed = 0; 3534 freed = 0;
3545 3535
3546 /* does region covered by leaf page precede Teof ? */ 3536 /* does region covered by leaf page precede Teof ? */
3547 xad = &p->xad[index]; 3537 xad = &p->xad[index];
3548 xoff = offsetXAD(xad); 3538 xoff = offsetXAD(xad);
3549 xlen = lengthXAD(xad); 3539 xlen = lengthXAD(xad);
3550 if (teof >= xoff + xlen) { 3540 if (teof >= xoff + xlen) {
3551 XT_PUTPAGE(mp); 3541 XT_PUTPAGE(mp);
3552 goto getParent; 3542 goto getParent;
3553 } 3543 }
3554 3544
3555 /* (re)acquire tlock of the leaf page */ 3545 /* (re)acquire tlock of the leaf page */
3556 if (log) { 3546 if (log) {
3557 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3547 if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3558 /* 3548 /*
3559 * We need to limit the size of the transaction 3549 * We need to limit the size of the transaction
3560 * to avoid exhausting pagecache & tlocks 3550 * to avoid exhausting pagecache & tlocks
3561 */ 3551 */
3562 XT_PUTPAGE(mp); 3552 XT_PUTPAGE(mp);
3563 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3553 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3564 goto getParent; 3554 goto getParent;
3565 } 3555 }
3566 tlck = txLock(tid, ip, mp, tlckXTREE); 3556 tlck = txLock(tid, ip, mp, tlckXTREE);
3567 tlck->type = tlckXTREE | tlckTRUNCATE; 3557 tlck->type = tlckXTREE | tlckTRUNCATE;
3568 xtlck = (struct xtlock *) & tlck->lock; 3558 xtlck = (struct xtlock *) & tlck->lock;
3569 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3559 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3570 } 3560 }
3571 BT_MARK_DIRTY(mp, ip); 3561 BT_MARK_DIRTY(mp, ip);
3572 3562
3573 /* 3563 /*
3574 * scan backward leaf page entries 3564 * scan backward leaf page entries
3575 */ 3565 */
3576 for (; index >= XTENTRYSTART; index--) { 3566 for (; index >= XTENTRYSTART; index--) {
3577 xad = &p->xad[index]; 3567 xad = &p->xad[index];
3578 xoff = offsetXAD(xad); 3568 xoff = offsetXAD(xad);
3579 xlen = lengthXAD(xad); 3569 xlen = lengthXAD(xad);
3580 xaddr = addressXAD(xad); 3570 xaddr = addressXAD(xad);
3581 3571
3582 /* 3572 /*
3583 * The "data" for a directory is indexed by the block 3573 * The "data" for a directory is indexed by the block
3584 * device's address space. This metadata must be invalidated 3574 * device's address space. This metadata must be invalidated
3585 * here 3575 * here
3586 */ 3576 */
3587 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3577 if (S_ISDIR(ip->i_mode) && (teof == 0))
3588 invalidate_xad_metapages(ip, *xad); 3578 invalidate_xad_metapages(ip, *xad);
3589 /* 3579 /*
3590 * entry beyond eof: continue scan of current page 3580 * entry beyond eof: continue scan of current page
3591 * xad 3581 * xad
3592 * ---|---=======-------> 3582 * ---|---=======------->
3593 * eof 3583 * eof
3594 */ 3584 */
3595 if (teof < xoff) { 3585 if (teof < xoff) {
3596 nfreed += xlen; 3586 nfreed += xlen;
3597 continue; 3587 continue;
3598 } 3588 }
3599 3589
3600 /* 3590 /*
3601 * (xoff <= teof): last entry to be deleted from page; 3591 * (xoff <= teof): last entry to be deleted from page;
3602 * If other entries remain in page: keep and update the page. 3592 * If other entries remain in page: keep and update the page.
3603 */ 3593 */
3604 3594
3605 /* 3595 /*
3606 * eof == entry_start: delete the entry 3596 * eof == entry_start: delete the entry
3607 * xad 3597 * xad
3608 * -------|=======-------> 3598 * -------|=======------->
3609 * eof 3599 * eof
3610 * 3600 *
3611 */ 3601 */
3612 if (teof == xoff) { 3602 if (teof == xoff) {
3613 nfreed += xlen; 3603 nfreed += xlen;
3614 3604
3615 if (index == XTENTRYSTART) 3605 if (index == XTENTRYSTART)
3616 break; 3606 break;
3617 3607
3618 nextindex = index; 3608 nextindex = index;
3619 } 3609 }
3620 /* 3610 /*
3621 * eof within the entry: truncate the entry. 3611 * eof within the entry: truncate the entry.
3622 * xad 3612 * xad
3623 * -------===|===-------> 3613 * -------===|===------->
3624 * eof 3614 * eof
3625 */ 3615 */
3626 else if (teof < xoff + xlen) { 3616 else if (teof < xoff + xlen) {
3627 /* update truncated entry */ 3617 /* update truncated entry */
3628 len = teof - xoff; 3618 len = teof - xoff;
3629 freexlen = xlen - len; 3619 freexlen = xlen - len;
3630 XADlength(xad, len); 3620 XADlength(xad, len);
3631 3621
3632 /* save pxd of truncated extent in tlck */ 3622 /* save pxd of truncated extent in tlck */
3633 xaddr += len; 3623 xaddr += len;
3634 if (log) { /* COMMIT_PWMAP */ 3624 if (log) { /* COMMIT_PWMAP */
3635 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3625 xtlck->lwm.offset = (xtlck->lwm.offset) ?
3636 min(index, (int)xtlck->lwm.offset) : index; 3626 min(index, (int)xtlck->lwm.offset) : index;
3637 xtlck->lwm.length = index + 1 - 3627 xtlck->lwm.length = index + 1 -
3638 xtlck->lwm.offset; 3628 xtlck->lwm.offset;
3639 xtlck->twm.offset = index; 3629 xtlck->twm.offset = index;
3640 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3630 pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
3641 pxdlock->flag = mlckFREEPXD; 3631 pxdlock->flag = mlckFREEPXD;
3642 PXDaddress(&pxdlock->pxd, xaddr); 3632 PXDaddress(&pxdlock->pxd, xaddr);
3643 PXDlength(&pxdlock->pxd, freexlen); 3633 PXDlength(&pxdlock->pxd, freexlen);
3644 } 3634 }
3645 /* free truncated extent */ 3635 /* free truncated extent */
3646 else { /* COMMIT_WMAP */ 3636 else { /* COMMIT_WMAP */
3647 3637
3648 pxdlock = (struct pxd_lock *) & xadlock; 3638 pxdlock = (struct pxd_lock *) & xadlock;
3649 pxdlock->flag = mlckFREEPXD; 3639 pxdlock->flag = mlckFREEPXD;
3650 PXDaddress(&pxdlock->pxd, xaddr); 3640 PXDaddress(&pxdlock->pxd, xaddr);
3651 PXDlength(&pxdlock->pxd, freexlen); 3641 PXDlength(&pxdlock->pxd, freexlen);
3652 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3642 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
3653 3643
3654 /* reset map lock */ 3644 /* reset map lock */
3655 xadlock.flag = mlckFREEXADLIST; 3645 xadlock.flag = mlckFREEXADLIST;
3656 } 3646 }
3657 3647
3658 /* current entry is new last entry; */ 3648 /* current entry is new last entry; */
3659 nextindex = index + 1; 3649 nextindex = index + 1;
3660 3650
3661 nfreed += freexlen; 3651 nfreed += freexlen;
3662 } 3652 }
3663 /* 3653 /*
3664 * eof beyond the entry: 3654 * eof beyond the entry:
3665 * xad 3655 * xad
3666 * -------=======---|---> 3656 * -------=======---|--->
3667 * eof 3657 * eof
3668 */ 3658 */
3669 else { /* (xoff + xlen < teof) */ 3659 else { /* (xoff + xlen < teof) */
3670 3660
3671 nextindex = index + 1; 3661 nextindex = index + 1;
3672 } 3662 }
3673 3663
3674 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3664 if (nextindex < le16_to_cpu(p->header.nextindex)) {
3675 if (!log) { /* COMMIT_WAMP */ 3665 if (!log) { /* COMMIT_WAMP */
3676 xadlock.xdlist = &p->xad[nextindex]; 3666 xadlock.xdlist = &p->xad[nextindex];
3677 xadlock.count = 3667 xadlock.count =
3678 le16_to_cpu(p->header.nextindex) - 3668 le16_to_cpu(p->header.nextindex) -
3679 nextindex; 3669 nextindex;
3680 txFreeMap(ip, (struct maplock *) & xadlock, 3670 txFreeMap(ip, (struct maplock *) & xadlock,
3681 NULL, COMMIT_WMAP); 3671 NULL, COMMIT_WMAP);
3682 } 3672 }
3683 p->header.nextindex = cpu_to_le16(nextindex); 3673 p->header.nextindex = cpu_to_le16(nextindex);
3684 } 3674 }
3685 3675
3686 XT_PUTPAGE(mp); 3676 XT_PUTPAGE(mp);
3687 3677
3688 /* assert(freed == 0); */ 3678 /* assert(freed == 0); */
3689 goto getParent; 3679 goto getParent;
3690 } /* end scan of leaf page entries */ 3680 } /* end scan of leaf page entries */
3691 3681
3692 freed = 1; 3682 freed = 1;
3693 3683
3694 /* 3684 /*
3695 * leaf page become empty: free the page if type != PMAP 3685 * leaf page become empty: free the page if type != PMAP
3696 */ 3686 */
3697 if (log) { /* COMMIT_PWMAP */ 3687 if (log) { /* COMMIT_PWMAP */
3698 /* txCommit() with tlckFREE: 3688 /* txCommit() with tlckFREE:
3699 * free data extents covered by leaf [XTENTRYSTART:hwm); 3689 * free data extents covered by leaf [XTENTRYSTART:hwm);
3700 * invalidate leaf if COMMIT_PWMAP; 3690 * invalidate leaf if COMMIT_PWMAP;
3701 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3691 * if (TRUNCATE), will write LOG_NOREDOPAGE;
3702 */ 3692 */
3703 tlck->type = tlckXTREE | tlckFREE; 3693 tlck->type = tlckXTREE | tlckFREE;
3704 } else { /* COMMIT_WAMP */ 3694 } else { /* COMMIT_WAMP */
3705 3695
3706 /* free data extents covered by leaf */ 3696 /* free data extents covered by leaf */
3707 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3697 xadlock.xdlist = &p->xad[XTENTRYSTART];
3708 xadlock.count = 3698 xadlock.count =
3709 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3699 le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3710 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3700 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
3711 } 3701 }
3712 3702
3713 if (p->header.flag & BT_ROOT) { 3703 if (p->header.flag & BT_ROOT) {
3714 p->header.flag &= ~BT_INTERNAL; 3704 p->header.flag &= ~BT_INTERNAL;
3715 p->header.flag |= BT_LEAF; 3705 p->header.flag |= BT_LEAF;
3716 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3706 p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3717 3707
3718 XT_PUTPAGE(mp); /* debug */ 3708 XT_PUTPAGE(mp); /* debug */
3719 goto out; 3709 goto out;
3720 } else { 3710 } else {
3721 if (log) { /* COMMIT_PWMAP */ 3711 if (log) { /* COMMIT_PWMAP */
3722 /* page will be invalidated at tx completion 3712 /* page will be invalidated at tx completion
3723 */ 3713 */
3724 XT_PUTPAGE(mp); 3714 XT_PUTPAGE(mp);
3725 } else { /* COMMIT_WMAP */ 3715 } else { /* COMMIT_WMAP */
3726 3716
3727 if (mp->lid) 3717 if (mp->lid)
3728 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3718 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
3729 3719
3730 /* invalidate empty leaf page */ 3720 /* invalidate empty leaf page */
3731 discard_metapage(mp); 3721 discard_metapage(mp);
3732 } 3722 }
3733 } 3723 }
3734 3724
3735 /* 3725 /*
3736 * the leaf page become empty: delete the parent entry 3726 * the leaf page become empty: delete the parent entry
3737 * for the leaf page if the parent page is to be kept 3727 * for the leaf page if the parent page is to be kept
3738 * in the new sized file. 3728 * in the new sized file.
3739 */ 3729 */
3740 3730
3741 /* 3731 /*
3742 * go back up to the parent page 3732 * go back up to the parent page
3743 */ 3733 */
3744 getParent: 3734 getParent:
3745 /* pop/restore parent entry for the current child page */ 3735 /* pop/restore parent entry for the current child page */
3746 if ((parent = BT_POP(&btstack)) == NULL) 3736 if ((parent = BT_POP(&btstack)) == NULL)
3747 /* current page must have been root */ 3737 /* current page must have been root */
3748 goto out; 3738 goto out;
3749 3739
3750 /* get back the parent page */ 3740 /* get back the parent page */
3751 bn = parent->bn; 3741 bn = parent->bn;
3752 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3742 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3753 if (rc) 3743 if (rc)
3754 return rc; 3744 return rc;
3755 3745
3756 index = parent->index; 3746 index = parent->index;
3757 3747
3758 /* 3748 /*
3759 * child page was not empty: 3749 * child page was not empty:
3760 */ 3750 */
3761 if (freed == 0) { 3751 if (freed == 0) {
3762 /* has any entry deleted from parent ? */ 3752 /* has any entry deleted from parent ? */
3763 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3753 if (index < le16_to_cpu(p->header.nextindex) - 1) {
3764 /* (re)acquire tlock on the parent page */ 3754 /* (re)acquire tlock on the parent page */
3765 if (log) { /* COMMIT_PWMAP */ 3755 if (log) { /* COMMIT_PWMAP */
3766 /* txCommit() with tlckTRUNCATE: 3756 /* txCommit() with tlckTRUNCATE:
3767 * free child extents covered by parent [); 3757 * free child extents covered by parent [);
3768 */ 3758 */
3769 tlck = txLock(tid, ip, mp, tlckXTREE); 3759 tlck = txLock(tid, ip, mp, tlckXTREE);
3770 xtlck = (struct xtlock *) & tlck->lock; 3760 xtlck = (struct xtlock *) & tlck->lock;
3771 if (!(tlck->type & tlckTRUNCATE)) { 3761 if (!(tlck->type & tlckTRUNCATE)) {
3772 xtlck->hwm.offset = 3762 xtlck->hwm.offset =
3773 le16_to_cpu(p->header. 3763 le16_to_cpu(p->header.
3774 nextindex) - 1; 3764 nextindex) - 1;
3775 tlck->type = 3765 tlck->type =
3776 tlckXTREE | tlckTRUNCATE; 3766 tlckXTREE | tlckTRUNCATE;
3777 } 3767 }
3778 } else { /* COMMIT_WMAP */ 3768 } else { /* COMMIT_WMAP */
3779 3769
3780 /* free child extents covered by parent */ 3770 /* free child extents covered by parent */
3781 xadlock.xdlist = &p->xad[index + 1]; 3771 xadlock.xdlist = &p->xad[index + 1];
3782 xadlock.count = 3772 xadlock.count =
3783 le16_to_cpu(p->header.nextindex) - 3773 le16_to_cpu(p->header.nextindex) -
3784 index - 1; 3774 index - 1;
3785 txFreeMap(ip, (struct maplock *) & xadlock, 3775 txFreeMap(ip, (struct maplock *) & xadlock,
3786 NULL, COMMIT_WMAP); 3776 NULL, COMMIT_WMAP);
3787 } 3777 }
3788 BT_MARK_DIRTY(mp, ip); 3778 BT_MARK_DIRTY(mp, ip);
3789 3779
3790 p->header.nextindex = cpu_to_le16(index + 1); 3780 p->header.nextindex = cpu_to_le16(index + 1);
3791 } 3781 }
3792 XT_PUTPAGE(mp); 3782 XT_PUTPAGE(mp);
3793 goto getParent; 3783 goto getParent;
3794 } 3784 }
3795 3785
3796 /* 3786 /*
3797 * child page was empty: 3787 * child page was empty:
3798 */ 3788 */
3799 nfreed += lengthXAD(&p->xad[index]); 3789 nfreed += lengthXAD(&p->xad[index]);
3800 3790
3801 /* 3791 /*
3802 * During working map update, child page's tlock must be handled 3792 * During working map update, child page's tlock must be handled
3803 * before parent's. This is because the parent's tlock will cause 3793 * before parent's. This is because the parent's tlock will cause
3804 * the child's disk space to be marked available in the wmap, so 3794 * the child's disk space to be marked available in the wmap, so
3805 * it's important that the child page be released by that time. 3795 * it's important that the child page be released by that time.
3806 * 3796 *
3807 * ToDo: tlocks should be on doubly-linked list, so we can 3797 * ToDo: tlocks should be on doubly-linked list, so we can
3808 * quickly remove it and add it to the end. 3798 * quickly remove it and add it to the end.
3809 */ 3799 */
3810 3800
3811 /* 3801 /*
3812 * Move parent page's tlock to the end of the tid's tlock list 3802 * Move parent page's tlock to the end of the tid's tlock list
3813 */ 3803 */
3814 if (log && mp->lid && (tblk->last != mp->lid) && 3804 if (log && mp->lid && (tblk->last != mp->lid) &&
3815 lid_to_tlock(mp->lid)->tid) { 3805 lid_to_tlock(mp->lid)->tid) {
3816 lid_t lid = mp->lid; 3806 lid_t lid = mp->lid;
3817 struct tlock *prev; 3807 struct tlock *prev;
3818 3808
3819 tlck = lid_to_tlock(lid); 3809 tlck = lid_to_tlock(lid);
3820 3810
3821 if (tblk->next == lid) 3811 if (tblk->next == lid)
3822 tblk->next = tlck->next; 3812 tblk->next = tlck->next;
3823 else { 3813 else {
3824 for (prev = lid_to_tlock(tblk->next); 3814 for (prev = lid_to_tlock(tblk->next);
3825 prev->next != lid; 3815 prev->next != lid;
3826 prev = lid_to_tlock(prev->next)) { 3816 prev = lid_to_tlock(prev->next)) {
3827 assert(prev->next); 3817 assert(prev->next);
3828 } 3818 }
3829 prev->next = tlck->next; 3819 prev->next = tlck->next;
3830 } 3820 }
3831 lid_to_tlock(tblk->last)->next = lid; 3821 lid_to_tlock(tblk->last)->next = lid;
3832 tlck->next = 0; 3822 tlck->next = 0;
3833 tblk->last = lid; 3823 tblk->last = lid;
3834 } 3824 }
3835 3825
3836 /* 3826 /*
3837 * parent page become empty: free the page 3827 * parent page become empty: free the page
3838 */ 3828 */
3839 if (index == XTENTRYSTART) { 3829 if (index == XTENTRYSTART) {
3840 if (log) { /* COMMIT_PWMAP */ 3830 if (log) { /* COMMIT_PWMAP */
3841 /* txCommit() with tlckFREE: 3831 /* txCommit() with tlckFREE:
3842 * free child extents covered by parent; 3832 * free child extents covered by parent;
3843 * invalidate parent if COMMIT_PWMAP; 3833 * invalidate parent if COMMIT_PWMAP;
3844 */ 3834 */
3845 tlck = txLock(tid, ip, mp, tlckXTREE); 3835 tlck = txLock(tid, ip, mp, tlckXTREE);
3846 xtlck = (struct xtlock *) & tlck->lock; 3836 xtlck = (struct xtlock *) & tlck->lock;
3847 xtlck->hwm.offset = 3837 xtlck->hwm.offset =
3848 le16_to_cpu(p->header.nextindex) - 1; 3838 le16_to_cpu(p->header.nextindex) - 1;
3849 tlck->type = tlckXTREE | tlckFREE; 3839 tlck->type = tlckXTREE | tlckFREE;
3850 } else { /* COMMIT_WMAP */ 3840 } else { /* COMMIT_WMAP */
3851 3841
3852 /* free child extents covered by parent */ 3842 /* free child extents covered by parent */
3853 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3843 xadlock.xdlist = &p->xad[XTENTRYSTART];
3854 xadlock.count = 3844 xadlock.count =
3855 le16_to_cpu(p->header.nextindex) - 3845 le16_to_cpu(p->header.nextindex) -
3856 XTENTRYSTART; 3846 XTENTRYSTART;
3857 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3847 txFreeMap(ip, (struct maplock *) & xadlock, NULL,
3858 COMMIT_WMAP); 3848 COMMIT_WMAP);
3859 } 3849 }
3860 BT_MARK_DIRTY(mp, ip); 3850 BT_MARK_DIRTY(mp, ip);
3861 3851
3862 if (p->header.flag & BT_ROOT) { 3852 if (p->header.flag & BT_ROOT) {
3863 p->header.flag &= ~BT_INTERNAL; 3853 p->header.flag &= ~BT_INTERNAL;
3864 p->header.flag |= BT_LEAF; 3854 p->header.flag |= BT_LEAF;
3865 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3855 p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3866 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3856 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
3867 /* 3857 /*
3868 * Shrink root down to allow inline 3858 * Shrink root down to allow inline
3869 * EA (otherwise fsck complains) 3859 * EA (otherwise fsck complains)
3870 */ 3860 */
3871 p->header.maxentry = 3861 p->header.maxentry =
3872 cpu_to_le16(XTROOTINITSLOT); 3862 cpu_to_le16(XTROOTINITSLOT);
3873 JFS_IP(ip)->mode2 |= INLINEEA; 3863 JFS_IP(ip)->mode2 |= INLINEEA;
3874 } 3864 }
3875 3865
3876 XT_PUTPAGE(mp); /* debug */ 3866 XT_PUTPAGE(mp); /* debug */
3877 goto out; 3867 goto out;
3878 } else { 3868 } else {
3879 if (log) { /* COMMIT_PWMAP */ 3869 if (log) { /* COMMIT_PWMAP */
3880 /* page will be invalidated at tx completion 3870 /* page will be invalidated at tx completion
3881 */ 3871 */
3882 XT_PUTPAGE(mp); 3872 XT_PUTPAGE(mp);
3883 } else { /* COMMIT_WMAP */ 3873 } else { /* COMMIT_WMAP */
3884 3874
3885 if (mp->lid) 3875 if (mp->lid)
3886 lid_to_tlock(mp->lid)->flag |= 3876 lid_to_tlock(mp->lid)->flag |=
3887 tlckFREELOCK; 3877 tlckFREELOCK;
3888 3878
3889 /* invalidate parent page */ 3879 /* invalidate parent page */
3890 discard_metapage(mp); 3880 discard_metapage(mp);
3891 } 3881 }
3892 3882
3893 /* parent has become empty and freed: 3883 /* parent has become empty and freed:
3894 * go back up to its parent page 3884 * go back up to its parent page
3895 */ 3885 */
3896 /* freed = 1; */ 3886 /* freed = 1; */
3897 goto getParent; 3887 goto getParent;
3898 } 3888 }
3899 } 3889 }
3900 /* 3890 /*
3901 * parent page still has entries for front region; 3891 * parent page still has entries for front region;
3902 */ 3892 */
3903 else { 3893 else {
3904 /* try truncate region covered by preceding entry 3894 /* try truncate region covered by preceding entry
3905 * (process backward) 3895 * (process backward)
3906 */ 3896 */
3907 index--; 3897 index--;
3908 3898
3909 /* go back down to the child page corresponding 3899 /* go back down to the child page corresponding
3910 * to the entry 3900 * to the entry
3911 */ 3901 */
3912 goto getChild; 3902 goto getChild;
3913 } 3903 }
3914 3904
3915 /* 3905 /*
3916 * internal page: go down to child page of current entry 3906 * internal page: go down to child page of current entry
3917 */ 3907 */
3918 getChild: 3908 getChild:
3919 /* save current parent entry for the child page */ 3909 /* save current parent entry for the child page */
3920 if (BT_STACK_FULL(&btstack)) { 3910 if (BT_STACK_FULL(&btstack)) {
3921 jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); 3911 jfs_error(ip->i_sb, "stack overrun in xtTruncate!");
3922 XT_PUTPAGE(mp); 3912 XT_PUTPAGE(mp);
3923 return -EIO; 3913 return -EIO;
3924 } 3914 }
3925 BT_PUSH(&btstack, bn, index); 3915 BT_PUSH(&btstack, bn, index);
3926 3916
3927 /* get child page */ 3917 /* get child page */
3928 xad = &p->xad[index]; 3918 xad = &p->xad[index];
3929 bn = addressXAD(xad); 3919 bn = addressXAD(xad);
3930 3920
3931 /* 3921 /*
3932 * first access of each internal entry: 3922 * first access of each internal entry:
3933 */ 3923 */
3934 /* release parent page */ 3924 /* release parent page */
3935 XT_PUTPAGE(mp); 3925 XT_PUTPAGE(mp);
3936 3926
3937 /* process the child page */ 3927 /* process the child page */
3938 goto getPage; 3928 goto getPage;
3939 3929
3940 out: 3930 out:
3941 /* 3931 /*
3942 * update file resource stat 3932 * update file resource stat
3943 */ 3933 */
3944 /* set size 3934 /* set size
3945 */ 3935 */
3946 if (S_ISDIR(ip->i_mode) && !newsize) 3936 if (S_ISDIR(ip->i_mode) && !newsize)
3947 ip->i_size = 1; /* fsck hates zero-length directories */ 3937 ip->i_size = 1; /* fsck hates zero-length directories */
3948 else 3938 else
3949 ip->i_size = newsize; 3939 ip->i_size = newsize;
3950 3940
3951 /* update quota allocation to reflect freed blocks */ 3941 /* update quota allocation to reflect freed blocks */
3952 DQUOT_FREE_BLOCK(ip, nfreed); 3942 DQUOT_FREE_BLOCK(ip, nfreed);
3953 3943
3954 /* 3944 /*
3955 * free tlock of invalidated pages 3945 * free tlock of invalidated pages
3956 */ 3946 */
3957 if (flag == COMMIT_WMAP) 3947 if (flag == COMMIT_WMAP)
3958 txFreelock(ip); 3948 txFreelock(ip);
3959 3949
3960 return newsize; 3950 return newsize;
3961 } 3951 }
3962 3952
3963 3953
3964 /* 3954 /*
3965 * xtTruncate_pmap() 3955 * xtTruncate_pmap()
3966 * 3956 *
3967 * function: 3957 * function:
3968 * Perform truncate to zero length for deleted file, leaving the 3958 * Perform truncate to zero length for deleted file, leaving the
3969 * the xtree and working map untouched. This allows the file to 3959 * the xtree and working map untouched. This allows the file to
3970 * be accessed via open file handles, while the delete of the file 3960 * be accessed via open file handles, while the delete of the file
3971 * is committed to disk. 3961 * is committed to disk.
3972 * 3962 *
3973 * parameter: 3963 * parameter:
3974 * tid_t tid, 3964 * tid_t tid,
3975 * struct inode *ip, 3965 * struct inode *ip,
3976 * s64 committed_size) 3966 * s64 committed_size)
3977 * 3967 *
3978 * return: new committed size 3968 * return: new committed size
3979 * 3969 *
3980 * note: 3970 * note:
3981 * 3971 *
3982 * To avoid deadlock by holding too many transaction locks, the 3972 * To avoid deadlock by holding too many transaction locks, the
3983 * truncation may be broken up into multiple transactions. 3973 * truncation may be broken up into multiple transactions.
3984 * The committed_size keeps track of part of the file has been 3974 * The committed_size keeps track of part of the file has been
3985 * freed from the pmaps. 3975 * freed from the pmaps.
3986 */ 3976 */
3987 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3977 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3988 { 3978 {
3989 s64 bn; 3979 s64 bn;
3990 struct btstack btstack; 3980 struct btstack btstack;
3991 int cmp; 3981 int cmp;
3992 int index; 3982 int index;
3993 int locked_leaves = 0; 3983 int locked_leaves = 0;
3994 struct metapage *mp; 3984 struct metapage *mp;
3995 xtpage_t *p; 3985 xtpage_t *p;
3996 struct btframe *parent; 3986 struct btframe *parent;
3997 int rc; 3987 int rc;
3998 struct tblock *tblk; 3988 struct tblock *tblk;
3999 struct tlock *tlck = NULL; 3989 struct tlock *tlck = NULL;
4000 xad_t *xad; 3990 xad_t *xad;
4001 int xlen; 3991 int xlen;
4002 s64 xoff; 3992 s64 xoff;
4003 struct xtlock *xtlck = NULL; 3993 struct xtlock *xtlck = NULL;
4004 3994
4005 /* save object truncation type */ 3995 /* save object truncation type */
4006 tblk = tid_to_tblock(tid); 3996 tblk = tid_to_tblock(tid);
4007 tblk->xflag |= COMMIT_PMAP; 3997 tblk->xflag |= COMMIT_PMAP;
4008 3998
4009 /* clear stack */ 3999 /* clear stack */
4010 BT_CLR(&btstack); 4000 BT_CLR(&btstack);
4011 4001
4012 if (committed_size) { 4002 if (committed_size) {
4013 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 4003 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
4014 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 4004 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
4015 if (rc) 4005 if (rc)
4016 return rc; 4006 return rc;
4017 4007
4018 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4008 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4019 4009
4020 if (cmp != 0) { 4010 if (cmp != 0) {
4021 XT_PUTPAGE(mp); 4011 XT_PUTPAGE(mp);
4022 jfs_error(ip->i_sb, 4012 jfs_error(ip->i_sb,
4023 "xtTruncate_pmap: did not find extent"); 4013 "xtTruncate_pmap: did not find extent");
4024 return -EIO; 4014 return -EIO;
4025 } 4015 }
4026 } else { 4016 } else {
4027 /* 4017 /*
4028 * start with root 4018 * start with root
4029 * 4019 *
4030 * root resides in the inode 4020 * root resides in the inode
4031 */ 4021 */
4032 bn = 0; 4022 bn = 0;
4033 4023
4034 /* 4024 /*
4035 * first access of each page: 4025 * first access of each page:
4036 */ 4026 */
4037 getPage: 4027 getPage:
4038 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4028 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4039 if (rc) 4029 if (rc)
4040 return rc; 4030 return rc;
4041 4031
4042 /* process entries backward from last index */ 4032 /* process entries backward from last index */
4043 index = le16_to_cpu(p->header.nextindex) - 1; 4033 index = le16_to_cpu(p->header.nextindex) - 1;
4044 4034
4045 if (p->header.flag & BT_INTERNAL) 4035 if (p->header.flag & BT_INTERNAL)
4046 goto getChild; 4036 goto getChild;
4047 } 4037 }
4048 4038
4049 /* 4039 /*
4050 * leaf page 4040 * leaf page
4051 */ 4041 */
4052 4042
4053 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4043 if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
4054 /* 4044 /*
4055 * We need to limit the size of the transaction 4045 * We need to limit the size of the transaction
4056 * to avoid exhausting pagecache & tlocks 4046 * to avoid exhausting pagecache & tlocks
4057 */ 4047 */
4058 xad = &p->xad[index]; 4048 xad = &p->xad[index];
4059 xoff = offsetXAD(xad); 4049 xoff = offsetXAD(xad);
4060 xlen = lengthXAD(xad); 4050 xlen = lengthXAD(xad);
4061 XT_PUTPAGE(mp); 4051 XT_PUTPAGE(mp);
4062 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4052 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
4063 } 4053 }
4064 tlck = txLock(tid, ip, mp, tlckXTREE); 4054 tlck = txLock(tid, ip, mp, tlckXTREE);
4065 tlck->type = tlckXTREE | tlckFREE; 4055 tlck->type = tlckXTREE | tlckFREE;
4066 xtlck = (struct xtlock *) & tlck->lock; 4056 xtlck = (struct xtlock *) & tlck->lock;
4067 xtlck->hwm.offset = index; 4057 xtlck->hwm.offset = index;
4068 4058
4069 4059
4070 XT_PUTPAGE(mp); 4060 XT_PUTPAGE(mp);
4071 4061
4072 /* 4062 /*
4073 * go back up to the parent page 4063 * go back up to the parent page
4074 */ 4064 */
4075 getParent: 4065 getParent:
4076 /* pop/restore parent entry for the current child page */ 4066 /* pop/restore parent entry for the current child page */
4077 if ((parent = BT_POP(&btstack)) == NULL) 4067 if ((parent = BT_POP(&btstack)) == NULL)
4078 /* current page must have been root */ 4068 /* current page must have been root */
4079 goto out; 4069 goto out;
4080 4070
4081 /* get back the parent page */ 4071 /* get back the parent page */
4082 bn = parent->bn; 4072 bn = parent->bn;
4083 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4073 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4084 if (rc) 4074 if (rc)
4085 return rc; 4075 return rc;
4086 4076
4087 index = parent->index; 4077 index = parent->index;
4088 4078
4089 /* 4079 /*
4090 * parent page become empty: free the page 4080 * parent page become empty: free the page
4091 */ 4081 */
4092 if (index == XTENTRYSTART) { 4082 if (index == XTENTRYSTART) {
4093 /* txCommit() with tlckFREE: 4083 /* txCommit() with tlckFREE:
4094 * free child extents covered by parent; 4084 * free child extents covered by parent;
4095 * invalidate parent if COMMIT_PWMAP; 4085 * invalidate parent if COMMIT_PWMAP;
4096 */ 4086 */
4097 tlck = txLock(tid, ip, mp, tlckXTREE); 4087 tlck = txLock(tid, ip, mp, tlckXTREE);
4098 xtlck = (struct xtlock *) & tlck->lock; 4088 xtlck = (struct xtlock *) & tlck->lock;
4099 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 4089 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
4100 tlck->type = tlckXTREE | tlckFREE; 4090 tlck->type = tlckXTREE | tlckFREE;
4101 4091
4102 XT_PUTPAGE(mp); 4092 XT_PUTPAGE(mp);
4103 4093
4104 if (p->header.flag & BT_ROOT) { 4094 if (p->header.flag & BT_ROOT) {
4105 4095
4106 goto out; 4096 goto out;
4107 } else { 4097 } else {
4108 goto getParent; 4098 goto getParent;
4109 } 4099 }
4110 } 4100 }
4111 /* 4101 /*
4112 * parent page still has entries for front region; 4102 * parent page still has entries for front region;
4113 */ 4103 */
4114 else 4104 else
4115 index--; 4105 index--;
4116 /* 4106 /*
4117 * internal page: go down to child page of current entry 4107 * internal page: go down to child page of current entry
4118 */ 4108 */
4119 getChild: 4109 getChild:
4120 /* save current parent entry for the child page */ 4110 /* save current parent entry for the child page */
4121 if (BT_STACK_FULL(&btstack)) { 4111 if (BT_STACK_FULL(&btstack)) {
4122 jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); 4112 jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!");
4123 XT_PUTPAGE(mp); 4113 XT_PUTPAGE(mp);
4124 return -EIO; 4114 return -EIO;
4125 } 4115 }
4126 BT_PUSH(&btstack, bn, index); 4116 BT_PUSH(&btstack, bn, index);
4127 4117
4128 /* get child page */ 4118 /* get child page */
4129 xad = &p->xad[index]; 4119 xad = &p->xad[index];
4130 bn = addressXAD(xad); 4120 bn = addressXAD(xad);
4131 4121
4132 /* 4122 /*
4133 * first access of each internal entry: 4123 * first access of each internal entry:
4134 */ 4124 */
4135 /* release parent page */ 4125 /* release parent page */
4136 XT_PUTPAGE(mp); 4126 XT_PUTPAGE(mp);
4137 4127
4138 /* process the child page */ 4128 /* process the child page */
4139 goto getPage; 4129 goto getPage;
4140 4130
4141 out: 4131 out:
4142 4132
4143 return 0; 4133 return 0;
4144 } 4134 }
4145 4135
4146 #ifdef CONFIG_JFS_STATISTICS 4136 #ifdef CONFIG_JFS_STATISTICS
4147 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4137 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
4148 int *eof, void *data) 4138 int *eof, void *data)
4149 { 4139 {
4150 int len = 0; 4140 int len = 0;
4151 off_t begin; 4141 off_t begin;
4152 4142
4153 len += sprintf(buffer, 4143 len += sprintf(buffer,
4154 "JFS Xtree statistics\n" 4144 "JFS Xtree statistics\n"
4155 "====================\n" 4145 "====================\n"
4156 "searches = %d\n" 4146 "searches = %d\n"
4157 "fast searches = %d\n" 4147 "fast searches = %d\n"
4158 "splits = %d\n", 4148 "splits = %d\n",
4159 xtStat.search, 4149 xtStat.search,
4160 xtStat.fastSearch, 4150 xtStat.fastSearch,
4161 xtStat.split); 4151 xtStat.split);
4162 4152
4163 begin = offset; 4153 begin = offset;
4164 *start = buffer + begin; 4154 *start = buffer + begin;
4165 len -= begin; 4155 len -= begin;
4166 4156
4167 if (len > length) 4157 if (len > length)
4168 len = length; 4158 len = length;
4169 else 4159 else
4170 *eof = 1; 4160 *eof = 1;
4171 4161
4172 if (len < 0) 4162 if (len < 0)
4173 len = 0; 4163 len = 0;
4174 4164
4175 return len; 4165 return len;
4176 } 4166 }
4177 #endif 4167 #endif
4178 4168