Commit 891456227881da9c565c455010380a40d385a478
Committed by
Dave Kleikamp
1 parent
96b5a46e2a
jfs: le*_add_cpu conversion
replace all: little_endian_variable = cpu_to_leX(leX_to_cpu(little_endian_variable) + expression_in_cpu_byteorder); with: leX_add_cpu(&little_endian_variable, expression_in_cpu_byteorder); generated with semantic patch Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com> Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com> Cc: jfs-discussion@lists.sourceforge.net
Showing 3 changed files with 19 additions and 33 deletions Inline Diff
fs/jfs/jfs_dmap.c
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2004 | 2 | * Copyright (C) International Business Machines Corp., 2000-2004 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include "jfs_incore.h" | 20 | #include "jfs_incore.h" |
21 | #include "jfs_superblock.h" | 21 | #include "jfs_superblock.h" |
22 | #include "jfs_dmap.h" | 22 | #include "jfs_dmap.h" |
23 | #include "jfs_imap.h" | 23 | #include "jfs_imap.h" |
24 | #include "jfs_lock.h" | 24 | #include "jfs_lock.h" |
25 | #include "jfs_metapage.h" | 25 | #include "jfs_metapage.h" |
26 | #include "jfs_debug.h" | 26 | #include "jfs_debug.h" |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * SERIALIZATION of the Block Allocation Map. | 29 | * SERIALIZATION of the Block Allocation Map. |
30 | * | 30 | * |
31 | * the working state of the block allocation map is accessed in | 31 | * the working state of the block allocation map is accessed in |
32 | * two directions: | 32 | * two directions: |
33 | * | 33 | * |
34 | * 1) allocation and free requests that start at the dmap | 34 | * 1) allocation and free requests that start at the dmap |
35 | * level and move up through the dmap control pages (i.e. | 35 | * level and move up through the dmap control pages (i.e. |
36 | * the vast majority of requests). | 36 | * the vast majority of requests). |
37 | * | 37 | * |
38 | * 2) allocation requests that start at dmap control page | 38 | * 2) allocation requests that start at dmap control page |
39 | * level and work down towards the dmaps. | 39 | * level and work down towards the dmaps. |
40 | * | 40 | * |
41 | * the serialization scheme used here is as follows. | 41 | * the serialization scheme used here is as follows. |
42 | * | 42 | * |
43 | * requests which start at the bottom are serialized against each | 43 | * requests which start at the bottom are serialized against each |
44 | * other through buffers and each requests holds onto its buffers | 44 | * other through buffers and each requests holds onto its buffers |
45 | * as it works it way up from a single dmap to the required level | 45 | * as it works it way up from a single dmap to the required level |
46 | * of dmap control page. | 46 | * of dmap control page. |
47 | * requests that start at the top are serialized against each other | 47 | * requests that start at the top are serialized against each other |
48 | * and request that start from the bottom by the multiple read/single | 48 | * and request that start from the bottom by the multiple read/single |
49 | * write inode lock of the bmap inode. requests starting at the top | 49 | * write inode lock of the bmap inode. requests starting at the top |
50 | * take this lock in write mode while request starting at the bottom | 50 | * take this lock in write mode while request starting at the bottom |
51 | * take the lock in read mode. a single top-down request may proceed | 51 | * take the lock in read mode. a single top-down request may proceed |
52 | * exclusively while multiple bottoms-up requests may proceed | 52 | * exclusively while multiple bottoms-up requests may proceed |
53 | * simultaneously (under the protection of busy buffers). | 53 | * simultaneously (under the protection of busy buffers). |
54 | * | 54 | * |
55 | * in addition to information found in dmaps and dmap control pages, | 55 | * in addition to information found in dmaps and dmap control pages, |
56 | * the working state of the block allocation map also includes read/ | 56 | * the working state of the block allocation map also includes read/ |
57 | * write information maintained in the bmap descriptor (i.e. total | 57 | * write information maintained in the bmap descriptor (i.e. total |
58 | * free block count, allocation group level free block counts). | 58 | * free block count, allocation group level free block counts). |
59 | * a single exclusive lock (BMAP_LOCK) is used to guard this information | 59 | * a single exclusive lock (BMAP_LOCK) is used to guard this information |
60 | * in the face of multiple-bottoms up requests. | 60 | * in the face of multiple-bottoms up requests. |
61 | * (lock ordering: IREAD_LOCK, BMAP_LOCK); | 61 | * (lock ordering: IREAD_LOCK, BMAP_LOCK); |
62 | * | 62 | * |
63 | * accesses to the persistent state of the block allocation map (limited | 63 | * accesses to the persistent state of the block allocation map (limited |
64 | * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. | 64 | * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. |
65 | */ | 65 | */ |
66 | 66 | ||
67 | #define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock) | 67 | #define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock) |
68 | #define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock) | 68 | #define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock) |
69 | #define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock) | 69 | #define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock) |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * forward references | 72 | * forward references |
73 | */ | 73 | */ |
74 | static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | 74 | static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, |
75 | int nblocks); | 75 | int nblocks); |
76 | static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); | 76 | static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); |
77 | static int dbBackSplit(dmtree_t * tp, int leafno); | 77 | static int dbBackSplit(dmtree_t * tp, int leafno); |
78 | static int dbJoin(dmtree_t * tp, int leafno, int newval); | 78 | static int dbJoin(dmtree_t * tp, int leafno, int newval); |
79 | static void dbAdjTree(dmtree_t * tp, int leafno, int newval); | 79 | static void dbAdjTree(dmtree_t * tp, int leafno, int newval); |
80 | static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, | 80 | static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, |
81 | int level); | 81 | int level); |
82 | static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); | 82 | static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); |
83 | static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, | 83 | static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, |
84 | int nblocks); | 84 | int nblocks); |
85 | static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno, | 85 | static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno, |
86 | int nblocks, | 86 | int nblocks, |
87 | int l2nb, s64 * results); | 87 | int l2nb, s64 * results); |
88 | static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 88 | static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, |
89 | int nblocks); | 89 | int nblocks); |
90 | static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks, | 90 | static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks, |
91 | int l2nb, | 91 | int l2nb, |
92 | s64 * results); | 92 | s64 * results); |
93 | static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, | 93 | static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, |
94 | s64 * results); | 94 | s64 * results); |
95 | static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, | 95 | static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, |
96 | s64 * results); | 96 | s64 * results); |
97 | static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); | 97 | static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); |
98 | static int dbFindBits(u32 word, int l2nb); | 98 | static int dbFindBits(u32 word, int l2nb); |
99 | static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); | 99 | static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); |
100 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); | 100 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); |
101 | static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | 101 | static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, |
102 | int nblocks); | 102 | int nblocks); |
103 | static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 103 | static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, |
104 | int nblocks); | 104 | int nblocks); |
105 | static int dbMaxBud(u8 * cp); | 105 | static int dbMaxBud(u8 * cp); |
106 | s64 dbMapFileSizeToMapSize(struct inode *ipbmap); | 106 | s64 dbMapFileSizeToMapSize(struct inode *ipbmap); |
107 | static int blkstol2(s64 nb); | 107 | static int blkstol2(s64 nb); |
108 | 108 | ||
109 | static int cntlz(u32 value); | 109 | static int cntlz(u32 value); |
110 | static int cnttz(u32 word); | 110 | static int cnttz(u32 word); |
111 | 111 | ||
112 | static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | 112 | static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, |
113 | int nblocks); | 113 | int nblocks); |
114 | static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks); | 114 | static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks); |
115 | static int dbInitDmapTree(struct dmap * dp); | 115 | static int dbInitDmapTree(struct dmap * dp); |
116 | static int dbInitTree(struct dmaptree * dtp); | 116 | static int dbInitTree(struct dmaptree * dtp); |
117 | static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i); | 117 | static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i); |
118 | static int dbGetL2AGSize(s64 nblocks); | 118 | static int dbGetL2AGSize(s64 nblocks); |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * buddy table | 121 | * buddy table |
122 | * | 122 | * |
123 | * table used for determining buddy sizes within characters of | 123 | * table used for determining buddy sizes within characters of |
124 | * dmap bitmap words. the characters themselves serve as indexes | 124 | * dmap bitmap words. the characters themselves serve as indexes |
125 | * into the table, with the table elements yielding the maximum | 125 | * into the table, with the table elements yielding the maximum |
126 | * binary buddy of free bits within the character. | 126 | * binary buddy of free bits within the character. |
127 | */ | 127 | */ |
128 | static const s8 budtab[256] = { | 128 | static const s8 budtab[256] = { |
129 | 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 129 | 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
130 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 130 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
131 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 131 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
132 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 132 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
133 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 133 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
134 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 134 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
135 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 135 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
136 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 136 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
137 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 137 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
138 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 138 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
139 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 139 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
140 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 140 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
141 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 141 | 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
142 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 142 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
143 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 143 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
144 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 | 144 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 |
145 | }; | 145 | }; |
146 | 146 | ||
147 | 147 | ||
148 | /* | 148 | /* |
149 | * NAME: dbMount() | 149 | * NAME: dbMount() |
150 | * | 150 | * |
151 | * FUNCTION: initializate the block allocation map. | 151 | * FUNCTION: initializate the block allocation map. |
152 | * | 152 | * |
153 | * memory is allocated for the in-core bmap descriptor and | 153 | * memory is allocated for the in-core bmap descriptor and |
154 | * the in-core descriptor is initialized from disk. | 154 | * the in-core descriptor is initialized from disk. |
155 | * | 155 | * |
156 | * PARAMETERS: | 156 | * PARAMETERS: |
157 | * ipbmap - pointer to in-core inode for the block map. | 157 | * ipbmap - pointer to in-core inode for the block map. |
158 | * | 158 | * |
159 | * RETURN VALUES: | 159 | * RETURN VALUES: |
160 | * 0 - success | 160 | * 0 - success |
161 | * -ENOMEM - insufficient memory | 161 | * -ENOMEM - insufficient memory |
162 | * -EIO - i/o error | 162 | * -EIO - i/o error |
163 | */ | 163 | */ |
164 | int dbMount(struct inode *ipbmap) | 164 | int dbMount(struct inode *ipbmap) |
165 | { | 165 | { |
166 | struct bmap *bmp; | 166 | struct bmap *bmp; |
167 | struct dbmap_disk *dbmp_le; | 167 | struct dbmap_disk *dbmp_le; |
168 | struct metapage *mp; | 168 | struct metapage *mp; |
169 | int i; | 169 | int i; |
170 | 170 | ||
171 | /* | 171 | /* |
172 | * allocate/initialize the in-memory bmap descriptor | 172 | * allocate/initialize the in-memory bmap descriptor |
173 | */ | 173 | */ |
174 | /* allocate memory for the in-memory bmap descriptor */ | 174 | /* allocate memory for the in-memory bmap descriptor */ |
175 | bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL); | 175 | bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL); |
176 | if (bmp == NULL) | 176 | if (bmp == NULL) |
177 | return -ENOMEM; | 177 | return -ENOMEM; |
178 | 178 | ||
179 | /* read the on-disk bmap descriptor. */ | 179 | /* read the on-disk bmap descriptor. */ |
180 | mp = read_metapage(ipbmap, | 180 | mp = read_metapage(ipbmap, |
181 | BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, | 181 | BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, |
182 | PSIZE, 0); | 182 | PSIZE, 0); |
183 | if (mp == NULL) { | 183 | if (mp == NULL) { |
184 | kfree(bmp); | 184 | kfree(bmp); |
185 | return -EIO; | 185 | return -EIO; |
186 | } | 186 | } |
187 | 187 | ||
188 | /* copy the on-disk bmap descriptor to its in-memory version. */ | 188 | /* copy the on-disk bmap descriptor to its in-memory version. */ |
189 | dbmp_le = (struct dbmap_disk *) mp->data; | 189 | dbmp_le = (struct dbmap_disk *) mp->data; |
190 | bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); | 190 | bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); |
191 | bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); | 191 | bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); |
192 | bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); | 192 | bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); |
193 | bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); | 193 | bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); |
194 | bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); | 194 | bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); |
195 | bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); | 195 | bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); |
196 | bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); | 196 | bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); |
197 | bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); | 197 | bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); |
198 | bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); | 198 | bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); |
199 | bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); | 199 | bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); |
200 | bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); | 200 | bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); |
201 | bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); | 201 | bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); |
202 | for (i = 0; i < MAXAG; i++) | 202 | for (i = 0; i < MAXAG; i++) |
203 | bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); | 203 | bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); |
204 | bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); | 204 | bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); |
205 | bmp->db_maxfreebud = dbmp_le->dn_maxfreebud; | 205 | bmp->db_maxfreebud = dbmp_le->dn_maxfreebud; |
206 | 206 | ||
207 | /* release the buffer. */ | 207 | /* release the buffer. */ |
208 | release_metapage(mp); | 208 | release_metapage(mp); |
209 | 209 | ||
210 | /* bind the bmap inode and the bmap descriptor to each other. */ | 210 | /* bind the bmap inode and the bmap descriptor to each other. */ |
211 | bmp->db_ipbmap = ipbmap; | 211 | bmp->db_ipbmap = ipbmap; |
212 | JFS_SBI(ipbmap->i_sb)->bmap = bmp; | 212 | JFS_SBI(ipbmap->i_sb)->bmap = bmp; |
213 | 213 | ||
214 | memset(bmp->db_active, 0, sizeof(bmp->db_active)); | 214 | memset(bmp->db_active, 0, sizeof(bmp->db_active)); |
215 | 215 | ||
216 | /* | 216 | /* |
217 | * allocate/initialize the bmap lock | 217 | * allocate/initialize the bmap lock |
218 | */ | 218 | */ |
219 | BMAP_LOCK_INIT(bmp); | 219 | BMAP_LOCK_INIT(bmp); |
220 | 220 | ||
221 | return (0); | 221 | return (0); |
222 | } | 222 | } |
223 | 223 | ||
224 | 224 | ||
225 | /* | 225 | /* |
226 | * NAME: dbUnmount() | 226 | * NAME: dbUnmount() |
227 | * | 227 | * |
228 | * FUNCTION: terminate the block allocation map in preparation for | 228 | * FUNCTION: terminate the block allocation map in preparation for |
229 | * file system unmount. | 229 | * file system unmount. |
230 | * | 230 | * |
231 | * the in-core bmap descriptor is written to disk and | 231 | * the in-core bmap descriptor is written to disk and |
232 | * the memory for this descriptor is freed. | 232 | * the memory for this descriptor is freed. |
233 | * | 233 | * |
234 | * PARAMETERS: | 234 | * PARAMETERS: |
235 | * ipbmap - pointer to in-core inode for the block map. | 235 | * ipbmap - pointer to in-core inode for the block map. |
236 | * | 236 | * |
237 | * RETURN VALUES: | 237 | * RETURN VALUES: |
238 | * 0 - success | 238 | * 0 - success |
239 | * -EIO - i/o error | 239 | * -EIO - i/o error |
240 | */ | 240 | */ |
241 | int dbUnmount(struct inode *ipbmap, int mounterror) | 241 | int dbUnmount(struct inode *ipbmap, int mounterror) |
242 | { | 242 | { |
243 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 243 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; |
244 | 244 | ||
245 | if (!(mounterror || isReadOnly(ipbmap))) | 245 | if (!(mounterror || isReadOnly(ipbmap))) |
246 | dbSync(ipbmap); | 246 | dbSync(ipbmap); |
247 | 247 | ||
248 | /* | 248 | /* |
249 | * Invalidate the page cache buffers | 249 | * Invalidate the page cache buffers |
250 | */ | 250 | */ |
251 | truncate_inode_pages(ipbmap->i_mapping, 0); | 251 | truncate_inode_pages(ipbmap->i_mapping, 0); |
252 | 252 | ||
253 | /* free the memory for the in-memory bmap. */ | 253 | /* free the memory for the in-memory bmap. */ |
254 | kfree(bmp); | 254 | kfree(bmp); |
255 | 255 | ||
256 | return (0); | 256 | return (0); |
257 | } | 257 | } |
258 | 258 | ||
259 | /* | 259 | /* |
260 | * dbSync() | 260 | * dbSync() |
261 | */ | 261 | */ |
262 | int dbSync(struct inode *ipbmap) | 262 | int dbSync(struct inode *ipbmap) |
263 | { | 263 | { |
264 | struct dbmap_disk *dbmp_le; | 264 | struct dbmap_disk *dbmp_le; |
265 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 265 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; |
266 | struct metapage *mp; | 266 | struct metapage *mp; |
267 | int i; | 267 | int i; |
268 | 268 | ||
269 | /* | 269 | /* |
270 | * write bmap global control page | 270 | * write bmap global control page |
271 | */ | 271 | */ |
272 | /* get the buffer for the on-disk bmap descriptor. */ | 272 | /* get the buffer for the on-disk bmap descriptor. */ |
273 | mp = read_metapage(ipbmap, | 273 | mp = read_metapage(ipbmap, |
274 | BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, | 274 | BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, |
275 | PSIZE, 0); | 275 | PSIZE, 0); |
276 | if (mp == NULL) { | 276 | if (mp == NULL) { |
277 | jfs_err("dbSync: read_metapage failed!"); | 277 | jfs_err("dbSync: read_metapage failed!"); |
278 | return -EIO; | 278 | return -EIO; |
279 | } | 279 | } |
280 | /* copy the in-memory version of the bmap to the on-disk version */ | 280 | /* copy the in-memory version of the bmap to the on-disk version */ |
281 | dbmp_le = (struct dbmap_disk *) mp->data; | 281 | dbmp_le = (struct dbmap_disk *) mp->data; |
282 | dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); | 282 | dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); |
283 | dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); | 283 | dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); |
284 | dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); | 284 | dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); |
285 | dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); | 285 | dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); |
286 | dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); | 286 | dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); |
287 | dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); | 287 | dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); |
288 | dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); | 288 | dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); |
289 | dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); | 289 | dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); |
290 | dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); | 290 | dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); |
291 | dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); | 291 | dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); |
292 | dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); | 292 | dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); |
293 | dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); | 293 | dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); |
294 | for (i = 0; i < MAXAG; i++) | 294 | for (i = 0; i < MAXAG; i++) |
295 | dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); | 295 | dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); |
296 | dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); | 296 | dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); |
297 | dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; | 297 | dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; |
298 | 298 | ||
299 | /* write the buffer */ | 299 | /* write the buffer */ |
300 | write_metapage(mp); | 300 | write_metapage(mp); |
301 | 301 | ||
302 | /* | 302 | /* |
303 | * write out dirty pages of bmap | 303 | * write out dirty pages of bmap |
304 | */ | 304 | */ |
305 | filemap_write_and_wait(ipbmap->i_mapping); | 305 | filemap_write_and_wait(ipbmap->i_mapping); |
306 | 306 | ||
307 | diWriteSpecial(ipbmap, 0); | 307 | diWriteSpecial(ipbmap, 0); |
308 | 308 | ||
309 | return (0); | 309 | return (0); |
310 | } | 310 | } |
311 | 311 | ||
312 | 312 | ||
313 | /* | 313 | /* |
314 | * NAME: dbFree() | 314 | * NAME: dbFree() |
315 | * | 315 | * |
316 | * FUNCTION: free the specified block range from the working block | 316 | * FUNCTION: free the specified block range from the working block |
317 | * allocation map. | 317 | * allocation map. |
318 | * | 318 | * |
319 | * the blocks will be free from the working map one dmap | 319 | * the blocks will be free from the working map one dmap |
320 | * at a time. | 320 | * at a time. |
321 | * | 321 | * |
322 | * PARAMETERS: | 322 | * PARAMETERS: |
323 | * ip - pointer to in-core inode; | 323 | * ip - pointer to in-core inode; |
324 | * blkno - starting block number to be freed. | 324 | * blkno - starting block number to be freed. |
325 | * nblocks - number of blocks to be freed. | 325 | * nblocks - number of blocks to be freed. |
326 | * | 326 | * |
327 | * RETURN VALUES: | 327 | * RETURN VALUES: |
328 | * 0 - success | 328 | * 0 - success |
329 | * -EIO - i/o error | 329 | * -EIO - i/o error |
330 | */ | 330 | */ |
331 | int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | 331 | int dbFree(struct inode *ip, s64 blkno, s64 nblocks) |
332 | { | 332 | { |
333 | struct metapage *mp; | 333 | struct metapage *mp; |
334 | struct dmap *dp; | 334 | struct dmap *dp; |
335 | int nb, rc; | 335 | int nb, rc; |
336 | s64 lblkno, rem; | 336 | s64 lblkno, rem; |
337 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 337 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
338 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 338 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; |
339 | 339 | ||
340 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 340 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
341 | 341 | ||
342 | /* block to be freed better be within the mapsize. */ | 342 | /* block to be freed better be within the mapsize. */ |
343 | if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { | 343 | if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { |
344 | IREAD_UNLOCK(ipbmap); | 344 | IREAD_UNLOCK(ipbmap); |
345 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", | 345 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", |
346 | (unsigned long long) blkno, | 346 | (unsigned long long) blkno, |
347 | (unsigned long long) nblocks); | 347 | (unsigned long long) nblocks); |
348 | jfs_error(ip->i_sb, | 348 | jfs_error(ip->i_sb, |
349 | "dbFree: block to be freed is outside the map"); | 349 | "dbFree: block to be freed is outside the map"); |
350 | return -EIO; | 350 | return -EIO; |
351 | } | 351 | } |
352 | 352 | ||
353 | /* | 353 | /* |
354 | * free the blocks a dmap at a time. | 354 | * free the blocks a dmap at a time. |
355 | */ | 355 | */ |
356 | mp = NULL; | 356 | mp = NULL; |
357 | for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { | 357 | for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { |
358 | /* release previous dmap if any */ | 358 | /* release previous dmap if any */ |
359 | if (mp) { | 359 | if (mp) { |
360 | write_metapage(mp); | 360 | write_metapage(mp); |
361 | } | 361 | } |
362 | 362 | ||
363 | /* get the buffer for the current dmap. */ | 363 | /* get the buffer for the current dmap. */ |
364 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 364 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); |
365 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 365 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); |
366 | if (mp == NULL) { | 366 | if (mp == NULL) { |
367 | IREAD_UNLOCK(ipbmap); | 367 | IREAD_UNLOCK(ipbmap); |
368 | return -EIO; | 368 | return -EIO; |
369 | } | 369 | } |
370 | dp = (struct dmap *) mp->data; | 370 | dp = (struct dmap *) mp->data; |
371 | 371 | ||
372 | /* determine the number of blocks to be freed from | 372 | /* determine the number of blocks to be freed from |
373 | * this dmap. | 373 | * this dmap. |
374 | */ | 374 | */ |
375 | nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); | 375 | nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); |
376 | 376 | ||
377 | /* free the blocks. */ | 377 | /* free the blocks. */ |
378 | if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { | 378 | if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { |
379 | jfs_error(ip->i_sb, "dbFree: error in block map\n"); | 379 | jfs_error(ip->i_sb, "dbFree: error in block map\n"); |
380 | release_metapage(mp); | 380 | release_metapage(mp); |
381 | IREAD_UNLOCK(ipbmap); | 381 | IREAD_UNLOCK(ipbmap); |
382 | return (rc); | 382 | return (rc); |
383 | } | 383 | } |
384 | } | 384 | } |
385 | 385 | ||
386 | /* write the last buffer. */ | 386 | /* write the last buffer. */ |
387 | write_metapage(mp); | 387 | write_metapage(mp); |
388 | 388 | ||
389 | IREAD_UNLOCK(ipbmap); | 389 | IREAD_UNLOCK(ipbmap); |
390 | 390 | ||
391 | return (0); | 391 | return (0); |
392 | } | 392 | } |
393 | 393 | ||
394 | 394 | ||
395 | /* | 395 | /* |
396 | * NAME: dbUpdatePMap() | 396 | * NAME: dbUpdatePMap() |
397 | * | 397 | * |
398 | * FUNCTION: update the allocation state (free or allocate) of the | 398 | * FUNCTION: update the allocation state (free or allocate) of the |
399 | * specified block range in the persistent block allocation map. | 399 | * specified block range in the persistent block allocation map. |
400 | * | 400 | * |
401 | * the blocks will be updated in the persistent map one | 401 | * the blocks will be updated in the persistent map one |
402 | * dmap at a time. | 402 | * dmap at a time. |
403 | * | 403 | * |
404 | * PARAMETERS: | 404 | * PARAMETERS: |
405 | * ipbmap - pointer to in-core inode for the block map. | 405 | * ipbmap - pointer to in-core inode for the block map. |
406 | * free - 'true' if block range is to be freed from the persistent | 406 | * free - 'true' if block range is to be freed from the persistent |
407 | * map; 'false' if it is to be allocated. | 407 | * map; 'false' if it is to be allocated. |
408 | * blkno - starting block number of the range. | 408 | * blkno - starting block number of the range. |
409 | * nblocks - number of contiguous blocks in the range. | 409 | * nblocks - number of contiguous blocks in the range. |
410 | * tblk - transaction block; | 410 | * tblk - transaction block; |
411 | * | 411 | * |
412 | * RETURN VALUES: | 412 | * RETURN VALUES: |
413 | * 0 - success | 413 | * 0 - success |
414 | * -EIO - i/o error | 414 | * -EIO - i/o error |
415 | */ | 415 | */ |
416 | int | 416 | int |
417 | dbUpdatePMap(struct inode *ipbmap, | 417 | dbUpdatePMap(struct inode *ipbmap, |
418 | int free, s64 blkno, s64 nblocks, struct tblock * tblk) | 418 | int free, s64 blkno, s64 nblocks, struct tblock * tblk) |
419 | { | 419 | { |
420 | int nblks, dbitno, wbitno, rbits; | 420 | int nblks, dbitno, wbitno, rbits; |
421 | int word, nbits, nwords; | 421 | int word, nbits, nwords; |
422 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 422 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; |
423 | s64 lblkno, rem, lastlblkno; | 423 | s64 lblkno, rem, lastlblkno; |
424 | u32 mask; | 424 | u32 mask; |
425 | struct dmap *dp; | 425 | struct dmap *dp; |
426 | struct metapage *mp; | 426 | struct metapage *mp; |
427 | struct jfs_log *log; | 427 | struct jfs_log *log; |
428 | int lsn, difft, diffp; | 428 | int lsn, difft, diffp; |
429 | unsigned long flags; | 429 | unsigned long flags; |
430 | 430 | ||
431 | /* the blocks better be within the mapsize. */ | 431 | /* the blocks better be within the mapsize. */ |
432 | if (blkno + nblocks > bmp->db_mapsize) { | 432 | if (blkno + nblocks > bmp->db_mapsize) { |
433 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", | 433 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", |
434 | (unsigned long long) blkno, | 434 | (unsigned long long) blkno, |
435 | (unsigned long long) nblocks); | 435 | (unsigned long long) nblocks); |
436 | jfs_error(ipbmap->i_sb, | 436 | jfs_error(ipbmap->i_sb, |
437 | "dbUpdatePMap: blocks are outside the map"); | 437 | "dbUpdatePMap: blocks are outside the map"); |
438 | return -EIO; | 438 | return -EIO; |
439 | } | 439 | } |
440 | 440 | ||
441 | /* compute delta of transaction lsn from log syncpt */ | 441 | /* compute delta of transaction lsn from log syncpt */ |
442 | lsn = tblk->lsn; | 442 | lsn = tblk->lsn; |
443 | log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; | 443 | log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; |
444 | logdiff(difft, lsn, log); | 444 | logdiff(difft, lsn, log); |
445 | 445 | ||
446 | /* | 446 | /* |
447 | * update the block state a dmap at a time. | 447 | * update the block state a dmap at a time. |
448 | */ | 448 | */ |
449 | mp = NULL; | 449 | mp = NULL; |
450 | lastlblkno = 0; | 450 | lastlblkno = 0; |
451 | for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) { | 451 | for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) { |
452 | /* get the buffer for the current dmap. */ | 452 | /* get the buffer for the current dmap. */ |
453 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 453 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); |
454 | if (lblkno != lastlblkno) { | 454 | if (lblkno != lastlblkno) { |
455 | if (mp) { | 455 | if (mp) { |
456 | write_metapage(mp); | 456 | write_metapage(mp); |
457 | } | 457 | } |
458 | 458 | ||
459 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, | 459 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, |
460 | 0); | 460 | 0); |
461 | if (mp == NULL) | 461 | if (mp == NULL) |
462 | return -EIO; | 462 | return -EIO; |
463 | metapage_wait_for_io(mp); | 463 | metapage_wait_for_io(mp); |
464 | } | 464 | } |
465 | dp = (struct dmap *) mp->data; | 465 | dp = (struct dmap *) mp->data; |
466 | 466 | ||
467 | /* determine the bit number and word within the dmap of | 467 | /* determine the bit number and word within the dmap of |
468 | * the starting block. also determine how many blocks | 468 | * the starting block. also determine how many blocks |
469 | * are to be updated within this dmap. | 469 | * are to be updated within this dmap. |
470 | */ | 470 | */ |
471 | dbitno = blkno & (BPERDMAP - 1); | 471 | dbitno = blkno & (BPERDMAP - 1); |
472 | word = dbitno >> L2DBWORD; | 472 | word = dbitno >> L2DBWORD; |
473 | nblks = min(rem, (s64)BPERDMAP - dbitno); | 473 | nblks = min(rem, (s64)BPERDMAP - dbitno); |
474 | 474 | ||
475 | /* update the bits of the dmap words. the first and last | 475 | /* update the bits of the dmap words. the first and last |
476 | * words may only have a subset of their bits updated. if | 476 | * words may only have a subset of their bits updated. if |
477 | * this is the case, we'll work against that word (i.e. | 477 | * this is the case, we'll work against that word (i.e. |
478 | * partial first and/or last) only in a single pass. a | 478 | * partial first and/or last) only in a single pass. a |
479 | * single pass will also be used to update all words that | 479 | * single pass will also be used to update all words that |
480 | * are to have all their bits updated. | 480 | * are to have all their bits updated. |
481 | */ | 481 | */ |
482 | for (rbits = nblks; rbits > 0; | 482 | for (rbits = nblks; rbits > 0; |
483 | rbits -= nbits, dbitno += nbits) { | 483 | rbits -= nbits, dbitno += nbits) { |
484 | /* determine the bit number within the word and | 484 | /* determine the bit number within the word and |
485 | * the number of bits within the word. | 485 | * the number of bits within the word. |
486 | */ | 486 | */ |
487 | wbitno = dbitno & (DBWORD - 1); | 487 | wbitno = dbitno & (DBWORD - 1); |
488 | nbits = min(rbits, DBWORD - wbitno); | 488 | nbits = min(rbits, DBWORD - wbitno); |
489 | 489 | ||
490 | /* check if only part of the word is to be updated. */ | 490 | /* check if only part of the word is to be updated. */ |
491 | if (nbits < DBWORD) { | 491 | if (nbits < DBWORD) { |
492 | /* update (free or allocate) the bits | 492 | /* update (free or allocate) the bits |
493 | * in this word. | 493 | * in this word. |
494 | */ | 494 | */ |
495 | mask = | 495 | mask = |
496 | (ONES << (DBWORD - nbits) >> wbitno); | 496 | (ONES << (DBWORD - nbits) >> wbitno); |
497 | if (free) | 497 | if (free) |
498 | dp->pmap[word] &= | 498 | dp->pmap[word] &= |
499 | cpu_to_le32(~mask); | 499 | cpu_to_le32(~mask); |
500 | else | 500 | else |
501 | dp->pmap[word] |= | 501 | dp->pmap[word] |= |
502 | cpu_to_le32(mask); | 502 | cpu_to_le32(mask); |
503 | 503 | ||
504 | word += 1; | 504 | word += 1; |
505 | } else { | 505 | } else { |
506 | /* one or more words are to have all | 506 | /* one or more words are to have all |
507 | * their bits updated. determine how | 507 | * their bits updated. determine how |
508 | * many words and how many bits. | 508 | * many words and how many bits. |
509 | */ | 509 | */ |
510 | nwords = rbits >> L2DBWORD; | 510 | nwords = rbits >> L2DBWORD; |
511 | nbits = nwords << L2DBWORD; | 511 | nbits = nwords << L2DBWORD; |
512 | 512 | ||
513 | /* update (free or allocate) the bits | 513 | /* update (free or allocate) the bits |
514 | * in these words. | 514 | * in these words. |
515 | */ | 515 | */ |
516 | if (free) | 516 | if (free) |
517 | memset(&dp->pmap[word], 0, | 517 | memset(&dp->pmap[word], 0, |
518 | nwords * 4); | 518 | nwords * 4); |
519 | else | 519 | else |
520 | memset(&dp->pmap[word], (int) ONES, | 520 | memset(&dp->pmap[word], (int) ONES, |
521 | nwords * 4); | 521 | nwords * 4); |
522 | 522 | ||
523 | word += nwords; | 523 | word += nwords; |
524 | } | 524 | } |
525 | } | 525 | } |
526 | 526 | ||
527 | /* | 527 | /* |
528 | * update dmap lsn | 528 | * update dmap lsn |
529 | */ | 529 | */ |
530 | if (lblkno == lastlblkno) | 530 | if (lblkno == lastlblkno) |
531 | continue; | 531 | continue; |
532 | 532 | ||
533 | lastlblkno = lblkno; | 533 | lastlblkno = lblkno; |
534 | 534 | ||
535 | LOGSYNC_LOCK(log, flags); | 535 | LOGSYNC_LOCK(log, flags); |
536 | if (mp->lsn != 0) { | 536 | if (mp->lsn != 0) { |
537 | /* inherit older/smaller lsn */ | 537 | /* inherit older/smaller lsn */ |
538 | logdiff(diffp, mp->lsn, log); | 538 | logdiff(diffp, mp->lsn, log); |
539 | if (difft < diffp) { | 539 | if (difft < diffp) { |
540 | mp->lsn = lsn; | 540 | mp->lsn = lsn; |
541 | 541 | ||
542 | /* move bp after tblock in logsync list */ | 542 | /* move bp after tblock in logsync list */ |
543 | list_move(&mp->synclist, &tblk->synclist); | 543 | list_move(&mp->synclist, &tblk->synclist); |
544 | } | 544 | } |
545 | 545 | ||
546 | /* inherit younger/larger clsn */ | 546 | /* inherit younger/larger clsn */ |
547 | logdiff(difft, tblk->clsn, log); | 547 | logdiff(difft, tblk->clsn, log); |
548 | logdiff(diffp, mp->clsn, log); | 548 | logdiff(diffp, mp->clsn, log); |
549 | if (difft > diffp) | 549 | if (difft > diffp) |
550 | mp->clsn = tblk->clsn; | 550 | mp->clsn = tblk->clsn; |
551 | } else { | 551 | } else { |
552 | mp->log = log; | 552 | mp->log = log; |
553 | mp->lsn = lsn; | 553 | mp->lsn = lsn; |
554 | 554 | ||
555 | /* insert bp after tblock in logsync list */ | 555 | /* insert bp after tblock in logsync list */ |
556 | log->count++; | 556 | log->count++; |
557 | list_add(&mp->synclist, &tblk->synclist); | 557 | list_add(&mp->synclist, &tblk->synclist); |
558 | 558 | ||
559 | mp->clsn = tblk->clsn; | 559 | mp->clsn = tblk->clsn; |
560 | } | 560 | } |
561 | LOGSYNC_UNLOCK(log, flags); | 561 | LOGSYNC_UNLOCK(log, flags); |
562 | } | 562 | } |
563 | 563 | ||
564 | /* write the last buffer. */ | 564 | /* write the last buffer. */ |
565 | if (mp) { | 565 | if (mp) { |
566 | write_metapage(mp); | 566 | write_metapage(mp); |
567 | } | 567 | } |
568 | 568 | ||
569 | return (0); | 569 | return (0); |
570 | } | 570 | } |
571 | 571 | ||
572 | 572 | ||
573 | /* | 573 | /* |
574 | * NAME: dbNextAG() | 574 | * NAME: dbNextAG() |
575 | * | 575 | * |
576 | * FUNCTION: find the preferred allocation group for new allocations. | 576 | * FUNCTION: find the preferred allocation group for new allocations. |
577 | * | 577 | * |
578 | * Within the allocation groups, we maintain a preferred | 578 | * Within the allocation groups, we maintain a preferred |
579 | * allocation group which consists of a group with at least | 579 | * allocation group which consists of a group with at least |
580 | * average free space. It is the preferred group that we target | 580 | * average free space. It is the preferred group that we target |
581 | * new inode allocation towards. The tie-in between inode | 581 | * new inode allocation towards. The tie-in between inode |
582 | * allocation and block allocation occurs as we allocate the | 582 | * allocation and block allocation occurs as we allocate the |
583 | * first (data) block of an inode and specify the inode (block) | 583 | * first (data) block of an inode and specify the inode (block) |
584 | * as the allocation hint for this block. | 584 | * as the allocation hint for this block. |
585 | * | 585 | * |
586 | * We try to avoid having more than one open file growing in | 586 | * We try to avoid having more than one open file growing in |
587 | * an allocation group, as this will lead to fragmentation. | 587 | * an allocation group, as this will lead to fragmentation. |
588 | * This differs from the old OS/2 method of trying to keep | 588 | * This differs from the old OS/2 method of trying to keep |
589 | * empty ags around for large allocations. | 589 | * empty ags around for large allocations. |
590 | * | 590 | * |
591 | * PARAMETERS: | 591 | * PARAMETERS: |
592 | * ipbmap - pointer to in-core inode for the block map. | 592 | * ipbmap - pointer to in-core inode for the block map. |
593 | * | 593 | * |
594 | * RETURN VALUES: | 594 | * RETURN VALUES: |
595 | * the preferred allocation group number. | 595 | * the preferred allocation group number. |
596 | */ | 596 | */ |
597 | int dbNextAG(struct inode *ipbmap) | 597 | int dbNextAG(struct inode *ipbmap) |
598 | { | 598 | { |
599 | s64 avgfree; | 599 | s64 avgfree; |
600 | int agpref; | 600 | int agpref; |
601 | s64 hwm = 0; | 601 | s64 hwm = 0; |
602 | int i; | 602 | int i; |
603 | int next_best = -1; | 603 | int next_best = -1; |
604 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 604 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; |
605 | 605 | ||
606 | BMAP_LOCK(bmp); | 606 | BMAP_LOCK(bmp); |
607 | 607 | ||
608 | /* determine the average number of free blocks within the ags. */ | 608 | /* determine the average number of free blocks within the ags. */ |
609 | avgfree = (u32)bmp->db_nfree / bmp->db_numag; | 609 | avgfree = (u32)bmp->db_nfree / bmp->db_numag; |
610 | 610 | ||
611 | /* | 611 | /* |
612 | * if the current preferred ag does not have an active allocator | 612 | * if the current preferred ag does not have an active allocator |
613 | * and has at least average freespace, return it | 613 | * and has at least average freespace, return it |
614 | */ | 614 | */ |
615 | agpref = bmp->db_agpref; | 615 | agpref = bmp->db_agpref; |
616 | if ((atomic_read(&bmp->db_active[agpref]) == 0) && | 616 | if ((atomic_read(&bmp->db_active[agpref]) == 0) && |
617 | (bmp->db_agfree[agpref] >= avgfree)) | 617 | (bmp->db_agfree[agpref] >= avgfree)) |
618 | goto unlock; | 618 | goto unlock; |
619 | 619 | ||
620 | /* From the last preferred ag, find the next one with at least | 620 | /* From the last preferred ag, find the next one with at least |
621 | * average free space. | 621 | * average free space. |
622 | */ | 622 | */ |
623 | for (i = 0 ; i < bmp->db_numag; i++, agpref++) { | 623 | for (i = 0 ; i < bmp->db_numag; i++, agpref++) { |
624 | if (agpref == bmp->db_numag) | 624 | if (agpref == bmp->db_numag) |
625 | agpref = 0; | 625 | agpref = 0; |
626 | 626 | ||
627 | if (atomic_read(&bmp->db_active[agpref])) | 627 | if (atomic_read(&bmp->db_active[agpref])) |
628 | /* open file is currently growing in this ag */ | 628 | /* open file is currently growing in this ag */ |
629 | continue; | 629 | continue; |
630 | if (bmp->db_agfree[agpref] >= avgfree) { | 630 | if (bmp->db_agfree[agpref] >= avgfree) { |
631 | /* Return this one */ | 631 | /* Return this one */ |
632 | bmp->db_agpref = agpref; | 632 | bmp->db_agpref = agpref; |
633 | goto unlock; | 633 | goto unlock; |
634 | } else if (bmp->db_agfree[agpref] > hwm) { | 634 | } else if (bmp->db_agfree[agpref] > hwm) { |
635 | /* Less than avg. freespace, but best so far */ | 635 | /* Less than avg. freespace, but best so far */ |
636 | hwm = bmp->db_agfree[agpref]; | 636 | hwm = bmp->db_agfree[agpref]; |
637 | next_best = agpref; | 637 | next_best = agpref; |
638 | } | 638 | } |
639 | } | 639 | } |
640 | 640 | ||
641 | /* | 641 | /* |
642 | * If no inactive ag was found with average freespace, use the | 642 | * If no inactive ag was found with average freespace, use the |
643 | * next best | 643 | * next best |
644 | */ | 644 | */ |
645 | if (next_best != -1) | 645 | if (next_best != -1) |
646 | bmp->db_agpref = next_best; | 646 | bmp->db_agpref = next_best; |
647 | /* else leave db_agpref unchanged */ | 647 | /* else leave db_agpref unchanged */ |
648 | unlock: | 648 | unlock: |
649 | BMAP_UNLOCK(bmp); | 649 | BMAP_UNLOCK(bmp); |
650 | 650 | ||
651 | /* return the preferred group. | 651 | /* return the preferred group. |
652 | */ | 652 | */ |
653 | return (bmp->db_agpref); | 653 | return (bmp->db_agpref); |
654 | } | 654 | } |
655 | 655 | ||
656 | /* | 656 | /* |
657 | * NAME: dbAlloc() | 657 | * NAME: dbAlloc() |
658 | * | 658 | * |
659 | * FUNCTION: attempt to allocate a specified number of contiguous free | 659 | * FUNCTION: attempt to allocate a specified number of contiguous free |
660 | * blocks from the working allocation block map. | 660 | * blocks from the working allocation block map. |
661 | * | 661 | * |
662 | * the block allocation policy uses hints and a multi-step | 662 | * the block allocation policy uses hints and a multi-step |
663 | * approach. | 663 | * approach. |
664 | * | 664 | * |
665 | * for allocation requests smaller than the number of blocks | 665 | * for allocation requests smaller than the number of blocks |
666 | * per dmap, we first try to allocate the new blocks | 666 | * per dmap, we first try to allocate the new blocks |
667 | * immediately following the hint. if these blocks are not | 667 | * immediately following the hint. if these blocks are not |
668 | * available, we try to allocate blocks near the hint. if | 668 | * available, we try to allocate blocks near the hint. if |
669 | * no blocks near the hint are available, we next try to | 669 | * no blocks near the hint are available, we next try to |
670 | * allocate within the same dmap as contains the hint. | 670 | * allocate within the same dmap as contains the hint. |
671 | * | 671 | * |
672 | * if no blocks are available in the dmap or the allocation | 672 | * if no blocks are available in the dmap or the allocation |
673 | * request is larger than the dmap size, we try to allocate | 673 | * request is larger than the dmap size, we try to allocate |
674 | * within the same allocation group as contains the hint. if | 674 | * within the same allocation group as contains the hint. if |
675 | * this does not succeed, we finally try to allocate anywhere | 675 | * this does not succeed, we finally try to allocate anywhere |
676 | * within the aggregate. | 676 | * within the aggregate. |
677 | * | 677 | * |
678 | * we also try to allocate anywhere within the aggregate for | 678 | * we also try to allocate anywhere within the aggregate for |
679 | * for allocation requests larger than the allocation group | 679 | * for allocation requests larger than the allocation group |
680 | * size or requests that specify no hint value. | 680 | * size or requests that specify no hint value. |
681 | * | 681 | * |
682 | * PARAMETERS: | 682 | * PARAMETERS: |
683 | * ip - pointer to in-core inode; | 683 | * ip - pointer to in-core inode; |
684 | * hint - allocation hint. | 684 | * hint - allocation hint. |
685 | * nblocks - number of contiguous blocks in the range. | 685 | * nblocks - number of contiguous blocks in the range. |
686 | * results - on successful return, set to the starting block number | 686 | * results - on successful return, set to the starting block number |
687 | * of the newly allocated contiguous range. | 687 | * of the newly allocated contiguous range. |
688 | * | 688 | * |
689 | * RETURN VALUES: | 689 | * RETURN VALUES: |
690 | * 0 - success | 690 | * 0 - success |
691 | * -ENOSPC - insufficient disk resources | 691 | * -ENOSPC - insufficient disk resources |
692 | * -EIO - i/o error | 692 | * -EIO - i/o error |
693 | */ | 693 | */ |
694 | int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | 694 | int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) |
695 | { | 695 | { |
696 | int rc, agno; | 696 | int rc, agno; |
697 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 697 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
698 | struct bmap *bmp; | 698 | struct bmap *bmp; |
699 | struct metapage *mp; | 699 | struct metapage *mp; |
700 | s64 lblkno, blkno; | 700 | s64 lblkno, blkno; |
701 | struct dmap *dp; | 701 | struct dmap *dp; |
702 | int l2nb; | 702 | int l2nb; |
703 | s64 mapSize; | 703 | s64 mapSize; |
704 | int writers; | 704 | int writers; |
705 | 705 | ||
706 | /* assert that nblocks is valid */ | 706 | /* assert that nblocks is valid */ |
707 | assert(nblocks > 0); | 707 | assert(nblocks > 0); |
708 | 708 | ||
709 | /* get the log2 number of blocks to be allocated. | 709 | /* get the log2 number of blocks to be allocated. |
710 | * if the number of blocks is not a log2 multiple, | 710 | * if the number of blocks is not a log2 multiple, |
711 | * it will be rounded up to the next log2 multiple. | 711 | * it will be rounded up to the next log2 multiple. |
712 | */ | 712 | */ |
713 | l2nb = BLKSTOL2(nblocks); | 713 | l2nb = BLKSTOL2(nblocks); |
714 | 714 | ||
715 | bmp = JFS_SBI(ip->i_sb)->bmap; | 715 | bmp = JFS_SBI(ip->i_sb)->bmap; |
716 | 716 | ||
717 | mapSize = bmp->db_mapsize; | 717 | mapSize = bmp->db_mapsize; |
718 | 718 | ||
719 | /* the hint should be within the map */ | 719 | /* the hint should be within the map */ |
720 | if (hint >= mapSize) { | 720 | if (hint >= mapSize) { |
721 | jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); | 721 | jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); |
722 | return -EIO; | 722 | return -EIO; |
723 | } | 723 | } |
724 | 724 | ||
725 | /* if the number of blocks to be allocated is greater than the | 725 | /* if the number of blocks to be allocated is greater than the |
726 | * allocation group size, try to allocate anywhere. | 726 | * allocation group size, try to allocate anywhere. |
727 | */ | 727 | */ |
728 | if (l2nb > bmp->db_agl2size) { | 728 | if (l2nb > bmp->db_agl2size) { |
729 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); | 729 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); |
730 | 730 | ||
731 | rc = dbAllocAny(bmp, nblocks, l2nb, results); | 731 | rc = dbAllocAny(bmp, nblocks, l2nb, results); |
732 | 732 | ||
733 | goto write_unlock; | 733 | goto write_unlock; |
734 | } | 734 | } |
735 | 735 | ||
736 | /* | 736 | /* |
737 | * If no hint, let dbNextAG recommend an allocation group | 737 | * If no hint, let dbNextAG recommend an allocation group |
738 | */ | 738 | */ |
739 | if (hint == 0) | 739 | if (hint == 0) |
740 | goto pref_ag; | 740 | goto pref_ag; |
741 | 741 | ||
742 | /* we would like to allocate close to the hint. adjust the | 742 | /* we would like to allocate close to the hint. adjust the |
743 | * hint to the block following the hint since the allocators | 743 | * hint to the block following the hint since the allocators |
744 | * will start looking for free space starting at this point. | 744 | * will start looking for free space starting at this point. |
745 | */ | 745 | */ |
746 | blkno = hint + 1; | 746 | blkno = hint + 1; |
747 | 747 | ||
748 | if (blkno >= bmp->db_mapsize) | 748 | if (blkno >= bmp->db_mapsize) |
749 | goto pref_ag; | 749 | goto pref_ag; |
750 | 750 | ||
751 | agno = blkno >> bmp->db_agl2size; | 751 | agno = blkno >> bmp->db_agl2size; |
752 | 752 | ||
753 | /* check if blkno crosses over into a new allocation group. | 753 | /* check if blkno crosses over into a new allocation group. |
754 | * if so, check if we should allow allocations within this | 754 | * if so, check if we should allow allocations within this |
755 | * allocation group. | 755 | * allocation group. |
756 | */ | 756 | */ |
757 | if ((blkno & (bmp->db_agsize - 1)) == 0) | 757 | if ((blkno & (bmp->db_agsize - 1)) == 0) |
758 | /* check if the AG is currenly being written to. | 758 | /* check if the AG is currenly being written to. |
759 | * if so, call dbNextAG() to find a non-busy | 759 | * if so, call dbNextAG() to find a non-busy |
760 | * AG with sufficient free space. | 760 | * AG with sufficient free space. |
761 | */ | 761 | */ |
762 | if (atomic_read(&bmp->db_active[agno])) | 762 | if (atomic_read(&bmp->db_active[agno])) |
763 | goto pref_ag; | 763 | goto pref_ag; |
764 | 764 | ||
765 | /* check if the allocation request size can be satisfied from a | 765 | /* check if the allocation request size can be satisfied from a |
766 | * single dmap. if so, try to allocate from the dmap containing | 766 | * single dmap. if so, try to allocate from the dmap containing |
767 | * the hint using a tiered strategy. | 767 | * the hint using a tiered strategy. |
768 | */ | 768 | */ |
769 | if (nblocks <= BPERDMAP) { | 769 | if (nblocks <= BPERDMAP) { |
770 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 770 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
771 | 771 | ||
772 | /* get the buffer for the dmap containing the hint. | 772 | /* get the buffer for the dmap containing the hint. |
773 | */ | 773 | */ |
774 | rc = -EIO; | 774 | rc = -EIO; |
775 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 775 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); |
776 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 776 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); |
777 | if (mp == NULL) | 777 | if (mp == NULL) |
778 | goto read_unlock; | 778 | goto read_unlock; |
779 | 779 | ||
780 | dp = (struct dmap *) mp->data; | 780 | dp = (struct dmap *) mp->data; |
781 | 781 | ||
782 | /* first, try to satisfy the allocation request with the | 782 | /* first, try to satisfy the allocation request with the |
783 | * blocks beginning at the hint. | 783 | * blocks beginning at the hint. |
784 | */ | 784 | */ |
785 | if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks)) | 785 | if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks)) |
786 | != -ENOSPC) { | 786 | != -ENOSPC) { |
787 | if (rc == 0) { | 787 | if (rc == 0) { |
788 | *results = blkno; | 788 | *results = blkno; |
789 | mark_metapage_dirty(mp); | 789 | mark_metapage_dirty(mp); |
790 | } | 790 | } |
791 | 791 | ||
792 | release_metapage(mp); | 792 | release_metapage(mp); |
793 | goto read_unlock; | 793 | goto read_unlock; |
794 | } | 794 | } |
795 | 795 | ||
796 | writers = atomic_read(&bmp->db_active[agno]); | 796 | writers = atomic_read(&bmp->db_active[agno]); |
797 | if ((writers > 1) || | 797 | if ((writers > 1) || |
798 | ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) { | 798 | ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) { |
799 | /* | 799 | /* |
800 | * Someone else is writing in this allocation | 800 | * Someone else is writing in this allocation |
801 | * group. To avoid fragmenting, try another ag | 801 | * group. To avoid fragmenting, try another ag |
802 | */ | 802 | */ |
803 | release_metapage(mp); | 803 | release_metapage(mp); |
804 | IREAD_UNLOCK(ipbmap); | 804 | IREAD_UNLOCK(ipbmap); |
805 | goto pref_ag; | 805 | goto pref_ag; |
806 | } | 806 | } |
807 | 807 | ||
808 | /* next, try to satisfy the allocation request with blocks | 808 | /* next, try to satisfy the allocation request with blocks |
809 | * near the hint. | 809 | * near the hint. |
810 | */ | 810 | */ |
811 | if ((rc = | 811 | if ((rc = |
812 | dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) | 812 | dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) |
813 | != -ENOSPC) { | 813 | != -ENOSPC) { |
814 | if (rc == 0) | 814 | if (rc == 0) |
815 | mark_metapage_dirty(mp); | 815 | mark_metapage_dirty(mp); |
816 | 816 | ||
817 | release_metapage(mp); | 817 | release_metapage(mp); |
818 | goto read_unlock; | 818 | goto read_unlock; |
819 | } | 819 | } |
820 | 820 | ||
821 | /* try to satisfy the allocation request with blocks within | 821 | /* try to satisfy the allocation request with blocks within |
822 | * the same dmap as the hint. | 822 | * the same dmap as the hint. |
823 | */ | 823 | */ |
824 | if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) | 824 | if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) |
825 | != -ENOSPC) { | 825 | != -ENOSPC) { |
826 | if (rc == 0) | 826 | if (rc == 0) |
827 | mark_metapage_dirty(mp); | 827 | mark_metapage_dirty(mp); |
828 | 828 | ||
829 | release_metapage(mp); | 829 | release_metapage(mp); |
830 | goto read_unlock; | 830 | goto read_unlock; |
831 | } | 831 | } |
832 | 832 | ||
833 | release_metapage(mp); | 833 | release_metapage(mp); |
834 | IREAD_UNLOCK(ipbmap); | 834 | IREAD_UNLOCK(ipbmap); |
835 | } | 835 | } |
836 | 836 | ||
837 | /* try to satisfy the allocation request with blocks within | 837 | /* try to satisfy the allocation request with blocks within |
838 | * the same allocation group as the hint. | 838 | * the same allocation group as the hint. |
839 | */ | 839 | */ |
840 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); | 840 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); |
841 | if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) | 841 | if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) |
842 | goto write_unlock; | 842 | goto write_unlock; |
843 | 843 | ||
844 | IWRITE_UNLOCK(ipbmap); | 844 | IWRITE_UNLOCK(ipbmap); |
845 | 845 | ||
846 | 846 | ||
847 | pref_ag: | 847 | pref_ag: |
848 | /* | 848 | /* |
849 | * Let dbNextAG recommend a preferred allocation group | 849 | * Let dbNextAG recommend a preferred allocation group |
850 | */ | 850 | */ |
851 | agno = dbNextAG(ipbmap); | 851 | agno = dbNextAG(ipbmap); |
852 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); | 852 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); |
853 | 853 | ||
854 | /* Try to allocate within this allocation group. if that fails, try to | 854 | /* Try to allocate within this allocation group. if that fails, try to |
855 | * allocate anywhere in the map. | 855 | * allocate anywhere in the map. |
856 | */ | 856 | */ |
857 | if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) | 857 | if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) |
858 | rc = dbAllocAny(bmp, nblocks, l2nb, results); | 858 | rc = dbAllocAny(bmp, nblocks, l2nb, results); |
859 | 859 | ||
860 | write_unlock: | 860 | write_unlock: |
861 | IWRITE_UNLOCK(ipbmap); | 861 | IWRITE_UNLOCK(ipbmap); |
862 | 862 | ||
863 | return (rc); | 863 | return (rc); |
864 | 864 | ||
865 | read_unlock: | 865 | read_unlock: |
866 | IREAD_UNLOCK(ipbmap); | 866 | IREAD_UNLOCK(ipbmap); |
867 | 867 | ||
868 | return (rc); | 868 | return (rc); |
869 | } | 869 | } |
870 | 870 | ||
871 | #ifdef _NOTYET | 871 | #ifdef _NOTYET |
872 | /* | 872 | /* |
873 | * NAME: dbAllocExact() | 873 | * NAME: dbAllocExact() |
874 | * | 874 | * |
875 | * FUNCTION: try to allocate the requested extent; | 875 | * FUNCTION: try to allocate the requested extent; |
876 | * | 876 | * |
877 | * PARAMETERS: | 877 | * PARAMETERS: |
878 | * ip - pointer to in-core inode; | 878 | * ip - pointer to in-core inode; |
879 | * blkno - extent address; | 879 | * blkno - extent address; |
880 | * nblocks - extent length; | 880 | * nblocks - extent length; |
881 | * | 881 | * |
882 | * RETURN VALUES: | 882 | * RETURN VALUES: |
883 | * 0 - success | 883 | * 0 - success |
884 | * -ENOSPC - insufficient disk resources | 884 | * -ENOSPC - insufficient disk resources |
885 | * -EIO - i/o error | 885 | * -EIO - i/o error |
886 | */ | 886 | */ |
887 | int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) | 887 | int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) |
888 | { | 888 | { |
889 | int rc; | 889 | int rc; |
890 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 890 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
891 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 891 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; |
892 | struct dmap *dp; | 892 | struct dmap *dp; |
893 | s64 lblkno; | 893 | s64 lblkno; |
894 | struct metapage *mp; | 894 | struct metapage *mp; |
895 | 895 | ||
896 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 896 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
897 | 897 | ||
898 | /* | 898 | /* |
899 | * validate extent request: | 899 | * validate extent request: |
900 | * | 900 | * |
901 | * note: defragfs policy: | 901 | * note: defragfs policy: |
902 | * max 64 blocks will be moved. | 902 | * max 64 blocks will be moved. |
903 | * allocation request size must be satisfied from a single dmap. | 903 | * allocation request size must be satisfied from a single dmap. |
904 | */ | 904 | */ |
905 | if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { | 905 | if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { |
906 | IREAD_UNLOCK(ipbmap); | 906 | IREAD_UNLOCK(ipbmap); |
907 | return -EINVAL; | 907 | return -EINVAL; |
908 | } | 908 | } |
909 | 909 | ||
910 | if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) { | 910 | if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) { |
911 | /* the free space is no longer available */ | 911 | /* the free space is no longer available */ |
912 | IREAD_UNLOCK(ipbmap); | 912 | IREAD_UNLOCK(ipbmap); |
913 | return -ENOSPC; | 913 | return -ENOSPC; |
914 | } | 914 | } |
915 | 915 | ||
916 | /* read in the dmap covering the extent */ | 916 | /* read in the dmap covering the extent */ |
917 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 917 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); |
918 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 918 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); |
919 | if (mp == NULL) { | 919 | if (mp == NULL) { |
920 | IREAD_UNLOCK(ipbmap); | 920 | IREAD_UNLOCK(ipbmap); |
921 | return -EIO; | 921 | return -EIO; |
922 | } | 922 | } |
923 | dp = (struct dmap *) mp->data; | 923 | dp = (struct dmap *) mp->data; |
924 | 924 | ||
925 | /* try to allocate the requested extent */ | 925 | /* try to allocate the requested extent */ |
926 | rc = dbAllocNext(bmp, dp, blkno, nblocks); | 926 | rc = dbAllocNext(bmp, dp, blkno, nblocks); |
927 | 927 | ||
928 | IREAD_UNLOCK(ipbmap); | 928 | IREAD_UNLOCK(ipbmap); |
929 | 929 | ||
930 | if (rc == 0) | 930 | if (rc == 0) |
931 | mark_metapage_dirty(mp); | 931 | mark_metapage_dirty(mp); |
932 | 932 | ||
933 | release_metapage(mp); | 933 | release_metapage(mp); |
934 | 934 | ||
935 | return (rc); | 935 | return (rc); |
936 | } | 936 | } |
937 | #endif /* _NOTYET */ | 937 | #endif /* _NOTYET */ |
938 | 938 | ||
939 | /* | 939 | /* |
940 | * NAME: dbReAlloc() | 940 | * NAME: dbReAlloc() |
941 | * | 941 | * |
942 | * FUNCTION: attempt to extend a current allocation by a specified | 942 | * FUNCTION: attempt to extend a current allocation by a specified |
943 | * number of blocks. | 943 | * number of blocks. |
944 | * | 944 | * |
945 | * this routine attempts to satisfy the allocation request | 945 | * this routine attempts to satisfy the allocation request |
946 | * by first trying to extend the existing allocation in | 946 | * by first trying to extend the existing allocation in |
947 | * place by allocating the additional blocks as the blocks | 947 | * place by allocating the additional blocks as the blocks |
948 | * immediately following the current allocation. if these | 948 | * immediately following the current allocation. if these |
949 | * blocks are not available, this routine will attempt to | 949 | * blocks are not available, this routine will attempt to |
950 | * allocate a new set of contiguous blocks large enough | 950 | * allocate a new set of contiguous blocks large enough |
951 | * to cover the existing allocation plus the additional | 951 | * to cover the existing allocation plus the additional |
952 | * number of blocks required. | 952 | * number of blocks required. |
953 | * | 953 | * |
954 | * PARAMETERS: | 954 | * PARAMETERS: |
955 | * ip - pointer to in-core inode requiring allocation. | 955 | * ip - pointer to in-core inode requiring allocation. |
956 | * blkno - starting block of the current allocation. | 956 | * blkno - starting block of the current allocation. |
957 | * nblocks - number of contiguous blocks within the current | 957 | * nblocks - number of contiguous blocks within the current |
958 | * allocation. | 958 | * allocation. |
959 | * addnblocks - number of blocks to add to the allocation. | 959 | * addnblocks - number of blocks to add to the allocation. |
960 | * results - on successful return, set to the starting block number | 960 | * results - on successful return, set to the starting block number |
961 | * of the existing allocation if the existing allocation | 961 | * of the existing allocation if the existing allocation |
962 | * was extended in place or to a newly allocated contiguous | 962 | * was extended in place or to a newly allocated contiguous |
963 | * range if the existing allocation could not be extended | 963 | * range if the existing allocation could not be extended |
964 | * in place. | 964 | * in place. |
965 | * | 965 | * |
966 | * RETURN VALUES: | 966 | * RETURN VALUES: |
967 | * 0 - success | 967 | * 0 - success |
968 | * -ENOSPC - insufficient disk resources | 968 | * -ENOSPC - insufficient disk resources |
969 | * -EIO - i/o error | 969 | * -EIO - i/o error |
970 | */ | 970 | */ |
971 | int | 971 | int |
972 | dbReAlloc(struct inode *ip, | 972 | dbReAlloc(struct inode *ip, |
973 | s64 blkno, s64 nblocks, s64 addnblocks, s64 * results) | 973 | s64 blkno, s64 nblocks, s64 addnblocks, s64 * results) |
974 | { | 974 | { |
975 | int rc; | 975 | int rc; |
976 | 976 | ||
977 | /* try to extend the allocation in place. | 977 | /* try to extend the allocation in place. |
978 | */ | 978 | */ |
979 | if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) { | 979 | if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) { |
980 | *results = blkno; | 980 | *results = blkno; |
981 | return (0); | 981 | return (0); |
982 | } else { | 982 | } else { |
983 | if (rc != -ENOSPC) | 983 | if (rc != -ENOSPC) |
984 | return (rc); | 984 | return (rc); |
985 | } | 985 | } |
986 | 986 | ||
987 | /* could not extend the allocation in place, so allocate a | 987 | /* could not extend the allocation in place, so allocate a |
988 | * new set of blocks for the entire request (i.e. try to get | 988 | * new set of blocks for the entire request (i.e. try to get |
989 | * a range of contiguous blocks large enough to cover the | 989 | * a range of contiguous blocks large enough to cover the |
990 | * existing allocation plus the additional blocks.) | 990 | * existing allocation plus the additional blocks.) |
991 | */ | 991 | */ |
992 | return (dbAlloc | 992 | return (dbAlloc |
993 | (ip, blkno + nblocks - 1, addnblocks + nblocks, results)); | 993 | (ip, blkno + nblocks - 1, addnblocks + nblocks, results)); |
994 | } | 994 | } |
995 | 995 | ||
996 | 996 | ||
997 | /* | 997 | /* |
998 | * NAME: dbExtend() | 998 | * NAME: dbExtend() |
999 | * | 999 | * |
1000 | * FUNCTION: attempt to extend a current allocation by a specified | 1000 | * FUNCTION: attempt to extend a current allocation by a specified |
1001 | * number of blocks. | 1001 | * number of blocks. |
1002 | * | 1002 | * |
1003 | * this routine attempts to satisfy the allocation request | 1003 | * this routine attempts to satisfy the allocation request |
1004 | * by first trying to extend the existing allocation in | 1004 | * by first trying to extend the existing allocation in |
1005 | * place by allocating the additional blocks as the blocks | 1005 | * place by allocating the additional blocks as the blocks |
1006 | * immediately following the current allocation. | 1006 | * immediately following the current allocation. |
1007 | * | 1007 | * |
1008 | * PARAMETERS: | 1008 | * PARAMETERS: |
1009 | * ip - pointer to in-core inode requiring allocation. | 1009 | * ip - pointer to in-core inode requiring allocation. |
1010 | * blkno - starting block of the current allocation. | 1010 | * blkno - starting block of the current allocation. |
1011 | * nblocks - number of contiguous blocks within the current | 1011 | * nblocks - number of contiguous blocks within the current |
1012 | * allocation. | 1012 | * allocation. |
1013 | * addnblocks - number of blocks to add to the allocation. | 1013 | * addnblocks - number of blocks to add to the allocation. |
1014 | * | 1014 | * |
1015 | * RETURN VALUES: | 1015 | * RETURN VALUES: |
1016 | * 0 - success | 1016 | * 0 - success |
1017 | * -ENOSPC - insufficient disk resources | 1017 | * -ENOSPC - insufficient disk resources |
1018 | * -EIO - i/o error | 1018 | * -EIO - i/o error |
1019 | */ | 1019 | */ |
1020 | static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) | 1020 | static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) |
1021 | { | 1021 | { |
1022 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | 1022 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); |
1023 | s64 lblkno, lastblkno, extblkno; | 1023 | s64 lblkno, lastblkno, extblkno; |
1024 | uint rel_block; | 1024 | uint rel_block; |
1025 | struct metapage *mp; | 1025 | struct metapage *mp; |
1026 | struct dmap *dp; | 1026 | struct dmap *dp; |
1027 | int rc; | 1027 | int rc; |
1028 | struct inode *ipbmap = sbi->ipbmap; | 1028 | struct inode *ipbmap = sbi->ipbmap; |
1029 | struct bmap *bmp; | 1029 | struct bmap *bmp; |
1030 | 1030 | ||
1031 | /* | 1031 | /* |
1032 | * We don't want a non-aligned extent to cross a page boundary | 1032 | * We don't want a non-aligned extent to cross a page boundary |
1033 | */ | 1033 | */ |
1034 | if (((rel_block = blkno & (sbi->nbperpage - 1))) && | 1034 | if (((rel_block = blkno & (sbi->nbperpage - 1))) && |
1035 | (rel_block + nblocks + addnblocks > sbi->nbperpage)) | 1035 | (rel_block + nblocks + addnblocks > sbi->nbperpage)) |
1036 | return -ENOSPC; | 1036 | return -ENOSPC; |
1037 | 1037 | ||
1038 | /* get the last block of the current allocation */ | 1038 | /* get the last block of the current allocation */ |
1039 | lastblkno = blkno + nblocks - 1; | 1039 | lastblkno = blkno + nblocks - 1; |
1040 | 1040 | ||
1041 | /* determine the block number of the block following | 1041 | /* determine the block number of the block following |
1042 | * the existing allocation. | 1042 | * the existing allocation. |
1043 | */ | 1043 | */ |
1044 | extblkno = lastblkno + 1; | 1044 | extblkno = lastblkno + 1; |
1045 | 1045 | ||
1046 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 1046 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
1047 | 1047 | ||
1048 | /* better be within the file system */ | 1048 | /* better be within the file system */ |
1049 | bmp = sbi->bmap; | 1049 | bmp = sbi->bmap; |
1050 | if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { | 1050 | if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { |
1051 | IREAD_UNLOCK(ipbmap); | 1051 | IREAD_UNLOCK(ipbmap); |
1052 | jfs_error(ip->i_sb, | 1052 | jfs_error(ip->i_sb, |
1053 | "dbExtend: the block is outside the filesystem"); | 1053 | "dbExtend: the block is outside the filesystem"); |
1054 | return -EIO; | 1054 | return -EIO; |
1055 | } | 1055 | } |
1056 | 1056 | ||
1057 | /* we'll attempt to extend the current allocation in place by | 1057 | /* we'll attempt to extend the current allocation in place by |
1058 | * allocating the additional blocks as the blocks immediately | 1058 | * allocating the additional blocks as the blocks immediately |
1059 | * following the current allocation. we only try to extend the | 1059 | * following the current allocation. we only try to extend the |
1060 | * current allocation in place if the number of additional blocks | 1060 | * current allocation in place if the number of additional blocks |
1061 | * can fit into a dmap, the last block of the current allocation | 1061 | * can fit into a dmap, the last block of the current allocation |
1062 | * is not the last block of the file system, and the start of the | 1062 | * is not the last block of the file system, and the start of the |
1063 | * inplace extension is not on an allocation group boundary. | 1063 | * inplace extension is not on an allocation group boundary. |
1064 | */ | 1064 | */ |
1065 | if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize || | 1065 | if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize || |
1066 | (extblkno & (bmp->db_agsize - 1)) == 0) { | 1066 | (extblkno & (bmp->db_agsize - 1)) == 0) { |
1067 | IREAD_UNLOCK(ipbmap); | 1067 | IREAD_UNLOCK(ipbmap); |
1068 | return -ENOSPC; | 1068 | return -ENOSPC; |
1069 | } | 1069 | } |
1070 | 1070 | ||
1071 | /* get the buffer for the dmap containing the first block | 1071 | /* get the buffer for the dmap containing the first block |
1072 | * of the extension. | 1072 | * of the extension. |
1073 | */ | 1073 | */ |
1074 | lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage); | 1074 | lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage); |
1075 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 1075 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); |
1076 | if (mp == NULL) { | 1076 | if (mp == NULL) { |
1077 | IREAD_UNLOCK(ipbmap); | 1077 | IREAD_UNLOCK(ipbmap); |
1078 | return -EIO; | 1078 | return -EIO; |
1079 | } | 1079 | } |
1080 | 1080 | ||
1081 | dp = (struct dmap *) mp->data; | 1081 | dp = (struct dmap *) mp->data; |
1082 | 1082 | ||
1083 | /* try to allocate the blocks immediately following the | 1083 | /* try to allocate the blocks immediately following the |
1084 | * current allocation. | 1084 | * current allocation. |
1085 | */ | 1085 | */ |
1086 | rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks); | 1086 | rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks); |
1087 | 1087 | ||
1088 | IREAD_UNLOCK(ipbmap); | 1088 | IREAD_UNLOCK(ipbmap); |
1089 | 1089 | ||
1090 | /* were we successful ? */ | 1090 | /* were we successful ? */ |
1091 | if (rc == 0) | 1091 | if (rc == 0) |
1092 | write_metapage(mp); | 1092 | write_metapage(mp); |
1093 | else | 1093 | else |
1094 | /* we were not successful */ | 1094 | /* we were not successful */ |
1095 | release_metapage(mp); | 1095 | release_metapage(mp); |
1096 | 1096 | ||
1097 | 1097 | ||
1098 | return (rc); | 1098 | return (rc); |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | 1101 | ||
1102 | /* | 1102 | /* |
1103 | * NAME: dbAllocNext() | 1103 | * NAME: dbAllocNext() |
1104 | * | 1104 | * |
1105 | * FUNCTION: attempt to allocate the blocks of the specified block | 1105 | * FUNCTION: attempt to allocate the blocks of the specified block |
1106 | * range within a dmap. | 1106 | * range within a dmap. |
1107 | * | 1107 | * |
1108 | * PARAMETERS: | 1108 | * PARAMETERS: |
1109 | * bmp - pointer to bmap descriptor | 1109 | * bmp - pointer to bmap descriptor |
1110 | * dp - pointer to dmap. | 1110 | * dp - pointer to dmap. |
1111 | * blkno - starting block number of the range. | 1111 | * blkno - starting block number of the range. |
1112 | * nblocks - number of contiguous free blocks of the range. | 1112 | * nblocks - number of contiguous free blocks of the range. |
1113 | * | 1113 | * |
1114 | * RETURN VALUES: | 1114 | * RETURN VALUES: |
1115 | * 0 - success | 1115 | * 0 - success |
1116 | * -ENOSPC - insufficient disk resources | 1116 | * -ENOSPC - insufficient disk resources |
1117 | * -EIO - i/o error | 1117 | * -EIO - i/o error |
1118 | * | 1118 | * |
1119 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; | 1119 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; |
1120 | */ | 1120 | */ |
1121 | static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, | 1121 | static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, |
1122 | int nblocks) | 1122 | int nblocks) |
1123 | { | 1123 | { |
1124 | int dbitno, word, rembits, nb, nwords, wbitno, nw; | 1124 | int dbitno, word, rembits, nb, nwords, wbitno, nw; |
1125 | int l2size; | 1125 | int l2size; |
1126 | s8 *leaf; | 1126 | s8 *leaf; |
1127 | u32 mask; | 1127 | u32 mask; |
1128 | 1128 | ||
1129 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { | 1129 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { |
1130 | jfs_error(bmp->db_ipbmap->i_sb, | 1130 | jfs_error(bmp->db_ipbmap->i_sb, |
1131 | "dbAllocNext: Corrupt dmap page"); | 1131 | "dbAllocNext: Corrupt dmap page"); |
1132 | return -EIO; | 1132 | return -EIO; |
1133 | } | 1133 | } |
1134 | 1134 | ||
1135 | /* pick up a pointer to the leaves of the dmap tree. | 1135 | /* pick up a pointer to the leaves of the dmap tree. |
1136 | */ | 1136 | */ |
1137 | leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); | 1137 | leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); |
1138 | 1138 | ||
1139 | /* determine the bit number and word within the dmap of the | 1139 | /* determine the bit number and word within the dmap of the |
1140 | * starting block. | 1140 | * starting block. |
1141 | */ | 1141 | */ |
1142 | dbitno = blkno & (BPERDMAP - 1); | 1142 | dbitno = blkno & (BPERDMAP - 1); |
1143 | word = dbitno >> L2DBWORD; | 1143 | word = dbitno >> L2DBWORD; |
1144 | 1144 | ||
1145 | /* check if the specified block range is contained within | 1145 | /* check if the specified block range is contained within |
1146 | * this dmap. | 1146 | * this dmap. |
1147 | */ | 1147 | */ |
1148 | if (dbitno + nblocks > BPERDMAP) | 1148 | if (dbitno + nblocks > BPERDMAP) |
1149 | return -ENOSPC; | 1149 | return -ENOSPC; |
1150 | 1150 | ||
1151 | /* check if the starting leaf indicates that anything | 1151 | /* check if the starting leaf indicates that anything |
1152 | * is free. | 1152 | * is free. |
1153 | */ | 1153 | */ |
1154 | if (leaf[word] == NOFREE) | 1154 | if (leaf[word] == NOFREE) |
1155 | return -ENOSPC; | 1155 | return -ENOSPC; |
1156 | 1156 | ||
1157 | /* check the dmaps words corresponding to block range to see | 1157 | /* check the dmaps words corresponding to block range to see |
1158 | * if the block range is free. not all bits of the first and | 1158 | * if the block range is free. not all bits of the first and |
1159 | * last words may be contained within the block range. if this | 1159 | * last words may be contained within the block range. if this |
1160 | * is the case, we'll work against those words (i.e. partial first | 1160 | * is the case, we'll work against those words (i.e. partial first |
1161 | * and/or last) on an individual basis (a single pass) and examine | 1161 | * and/or last) on an individual basis (a single pass) and examine |
1162 | * the actual bits to determine if they are free. a single pass | 1162 | * the actual bits to determine if they are free. a single pass |
1163 | * will be used for all dmap words fully contained within the | 1163 | * will be used for all dmap words fully contained within the |
1164 | * specified range. within this pass, the leaves of the dmap | 1164 | * specified range. within this pass, the leaves of the dmap |
1165 | * tree will be examined to determine if the blocks are free. a | 1165 | * tree will be examined to determine if the blocks are free. a |
1166 | * single leaf may describe the free space of multiple dmap | 1166 | * single leaf may describe the free space of multiple dmap |
1167 | * words, so we may visit only a subset of the actual leaves | 1167 | * words, so we may visit only a subset of the actual leaves |
1168 | * corresponding to the dmap words of the block range. | 1168 | * corresponding to the dmap words of the block range. |
1169 | */ | 1169 | */ |
1170 | for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { | 1170 | for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { |
1171 | /* determine the bit number within the word and | 1171 | /* determine the bit number within the word and |
1172 | * the number of bits within the word. | 1172 | * the number of bits within the word. |
1173 | */ | 1173 | */ |
1174 | wbitno = dbitno & (DBWORD - 1); | 1174 | wbitno = dbitno & (DBWORD - 1); |
1175 | nb = min(rembits, DBWORD - wbitno); | 1175 | nb = min(rembits, DBWORD - wbitno); |
1176 | 1176 | ||
1177 | /* check if only part of the word is to be examined. | 1177 | /* check if only part of the word is to be examined. |
1178 | */ | 1178 | */ |
1179 | if (nb < DBWORD) { | 1179 | if (nb < DBWORD) { |
1180 | /* check if the bits are free. | 1180 | /* check if the bits are free. |
1181 | */ | 1181 | */ |
1182 | mask = (ONES << (DBWORD - nb) >> wbitno); | 1182 | mask = (ONES << (DBWORD - nb) >> wbitno); |
1183 | if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask) | 1183 | if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask) |
1184 | return -ENOSPC; | 1184 | return -ENOSPC; |
1185 | 1185 | ||
1186 | word += 1; | 1186 | word += 1; |
1187 | } else { | 1187 | } else { |
1188 | /* one or more dmap words are fully contained | 1188 | /* one or more dmap words are fully contained |
1189 | * within the block range. determine how many | 1189 | * within the block range. determine how many |
1190 | * words and how many bits. | 1190 | * words and how many bits. |
1191 | */ | 1191 | */ |
1192 | nwords = rembits >> L2DBWORD; | 1192 | nwords = rembits >> L2DBWORD; |
1193 | nb = nwords << L2DBWORD; | 1193 | nb = nwords << L2DBWORD; |
1194 | 1194 | ||
1195 | /* now examine the appropriate leaves to determine | 1195 | /* now examine the appropriate leaves to determine |
1196 | * if the blocks are free. | 1196 | * if the blocks are free. |
1197 | */ | 1197 | */ |
1198 | while (nwords > 0) { | 1198 | while (nwords > 0) { |
1199 | /* does the leaf describe any free space ? | 1199 | /* does the leaf describe any free space ? |
1200 | */ | 1200 | */ |
1201 | if (leaf[word] < BUDMIN) | 1201 | if (leaf[word] < BUDMIN) |
1202 | return -ENOSPC; | 1202 | return -ENOSPC; |
1203 | 1203 | ||
1204 | /* determine the l2 number of bits provided | 1204 | /* determine the l2 number of bits provided |
1205 | * by this leaf. | 1205 | * by this leaf. |
1206 | */ | 1206 | */ |
1207 | l2size = | 1207 | l2size = |
1208 | min((int)leaf[word], NLSTOL2BSZ(nwords)); | 1208 | min((int)leaf[word], NLSTOL2BSZ(nwords)); |
1209 | 1209 | ||
1210 | /* determine how many words were handled. | 1210 | /* determine how many words were handled. |
1211 | */ | 1211 | */ |
1212 | nw = BUDSIZE(l2size, BUDMIN); | 1212 | nw = BUDSIZE(l2size, BUDMIN); |
1213 | 1213 | ||
1214 | nwords -= nw; | 1214 | nwords -= nw; |
1215 | word += nw; | 1215 | word += nw; |
1216 | } | 1216 | } |
1217 | } | 1217 | } |
1218 | } | 1218 | } |
1219 | 1219 | ||
1220 | /* allocate the blocks. | 1220 | /* allocate the blocks. |
1221 | */ | 1221 | */ |
1222 | return (dbAllocDmap(bmp, dp, blkno, nblocks)); | 1222 | return (dbAllocDmap(bmp, dp, blkno, nblocks)); |
1223 | } | 1223 | } |
1224 | 1224 | ||
1225 | 1225 | ||
1226 | /* | 1226 | /* |
1227 | * NAME: dbAllocNear() | 1227 | * NAME: dbAllocNear() |
1228 | * | 1228 | * |
1229 | * FUNCTION: attempt to allocate a number of contiguous free blocks near | 1229 | * FUNCTION: attempt to allocate a number of contiguous free blocks near |
1230 | * a specified block (hint) within a dmap. | 1230 | * a specified block (hint) within a dmap. |
1231 | * | 1231 | * |
1232 | * starting with the dmap leaf that covers the hint, we'll | 1232 | * starting with the dmap leaf that covers the hint, we'll |
1233 | * check the next four contiguous leaves for sufficient free | 1233 | * check the next four contiguous leaves for sufficient free |
1234 | * space. if sufficient free space is found, we'll allocate | 1234 | * space. if sufficient free space is found, we'll allocate |
1235 | * the desired free space. | 1235 | * the desired free space. |
1236 | * | 1236 | * |
1237 | * PARAMETERS: | 1237 | * PARAMETERS: |
1238 | * bmp - pointer to bmap descriptor | 1238 | * bmp - pointer to bmap descriptor |
1239 | * dp - pointer to dmap. | 1239 | * dp - pointer to dmap. |
1240 | * blkno - block number to allocate near. | 1240 | * blkno - block number to allocate near. |
1241 | * nblocks - actual number of contiguous free blocks desired. | 1241 | * nblocks - actual number of contiguous free blocks desired. |
1242 | * l2nb - log2 number of contiguous free blocks desired. | 1242 | * l2nb - log2 number of contiguous free blocks desired. |
1243 | * results - on successful return, set to the starting block number | 1243 | * results - on successful return, set to the starting block number |
1244 | * of the newly allocated range. | 1244 | * of the newly allocated range. |
1245 | * | 1245 | * |
1246 | * RETURN VALUES: | 1246 | * RETURN VALUES: |
1247 | * 0 - success | 1247 | * 0 - success |
1248 | * -ENOSPC - insufficient disk resources | 1248 | * -ENOSPC - insufficient disk resources |
1249 | * -EIO - i/o error | 1249 | * -EIO - i/o error |
1250 | * | 1250 | * |
1251 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; | 1251 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; |
1252 | */ | 1252 | */ |
1253 | static int | 1253 | static int |
1254 | dbAllocNear(struct bmap * bmp, | 1254 | dbAllocNear(struct bmap * bmp, |
1255 | struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results) | 1255 | struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results) |
1256 | { | 1256 | { |
1257 | int word, lword, rc; | 1257 | int word, lword, rc; |
1258 | s8 *leaf; | 1258 | s8 *leaf; |
1259 | 1259 | ||
1260 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { | 1260 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { |
1261 | jfs_error(bmp->db_ipbmap->i_sb, | 1261 | jfs_error(bmp->db_ipbmap->i_sb, |
1262 | "dbAllocNear: Corrupt dmap page"); | 1262 | "dbAllocNear: Corrupt dmap page"); |
1263 | return -EIO; | 1263 | return -EIO; |
1264 | } | 1264 | } |
1265 | 1265 | ||
1266 | leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); | 1266 | leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); |
1267 | 1267 | ||
1268 | /* determine the word within the dmap that holds the hint | 1268 | /* determine the word within the dmap that holds the hint |
1269 | * (i.e. blkno). also, determine the last word in the dmap | 1269 | * (i.e. blkno). also, determine the last word in the dmap |
1270 | * that we'll include in our examination. | 1270 | * that we'll include in our examination. |
1271 | */ | 1271 | */ |
1272 | word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; | 1272 | word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; |
1273 | lword = min(word + 4, LPERDMAP); | 1273 | lword = min(word + 4, LPERDMAP); |
1274 | 1274 | ||
1275 | /* examine the leaves for sufficient free space. | 1275 | /* examine the leaves for sufficient free space. |
1276 | */ | 1276 | */ |
1277 | for (; word < lword; word++) { | 1277 | for (; word < lword; word++) { |
1278 | /* does the leaf describe sufficient free space ? | 1278 | /* does the leaf describe sufficient free space ? |
1279 | */ | 1279 | */ |
1280 | if (leaf[word] < l2nb) | 1280 | if (leaf[word] < l2nb) |
1281 | continue; | 1281 | continue; |
1282 | 1282 | ||
1283 | /* determine the block number within the file system | 1283 | /* determine the block number within the file system |
1284 | * of the first block described by this dmap word. | 1284 | * of the first block described by this dmap word. |
1285 | */ | 1285 | */ |
1286 | blkno = le64_to_cpu(dp->start) + (word << L2DBWORD); | 1286 | blkno = le64_to_cpu(dp->start) + (word << L2DBWORD); |
1287 | 1287 | ||
1288 | /* if not all bits of the dmap word are free, get the | 1288 | /* if not all bits of the dmap word are free, get the |
1289 | * starting bit number within the dmap word of the required | 1289 | * starting bit number within the dmap word of the required |
1290 | * string of free bits and adjust the block number with the | 1290 | * string of free bits and adjust the block number with the |
1291 | * value. | 1291 | * value. |
1292 | */ | 1292 | */ |
1293 | if (leaf[word] < BUDMIN) | 1293 | if (leaf[word] < BUDMIN) |
1294 | blkno += | 1294 | blkno += |
1295 | dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb); | 1295 | dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb); |
1296 | 1296 | ||
1297 | /* allocate the blocks. | 1297 | /* allocate the blocks. |
1298 | */ | 1298 | */ |
1299 | if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) | 1299 | if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) |
1300 | *results = blkno; | 1300 | *results = blkno; |
1301 | 1301 | ||
1302 | return (rc); | 1302 | return (rc); |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | return -ENOSPC; | 1305 | return -ENOSPC; |
1306 | } | 1306 | } |
1307 | 1307 | ||
1308 | 1308 | ||
1309 | /* | 1309 | /* |
1310 | * NAME: dbAllocAG() | 1310 | * NAME: dbAllocAG() |
1311 | * | 1311 | * |
1312 | * FUNCTION: attempt to allocate the specified number of contiguous | 1312 | * FUNCTION: attempt to allocate the specified number of contiguous |
1313 | * free blocks within the specified allocation group. | 1313 | * free blocks within the specified allocation group. |
1314 | * | 1314 | * |
1315 | * unless the allocation group size is equal to the number | 1315 | * unless the allocation group size is equal to the number |
1316 | * of blocks per dmap, the dmap control pages will be used to | 1316 | * of blocks per dmap, the dmap control pages will be used to |
1317 | * find the required free space, if available. we start the | 1317 | * find the required free space, if available. we start the |
1318 | * search at the highest dmap control page level which | 1318 | * search at the highest dmap control page level which |
1319 | * distinctly describes the allocation group's free space | 1319 | * distinctly describes the allocation group's free space |
1320 | * (i.e. the highest level at which the allocation group's | 1320 | * (i.e. the highest level at which the allocation group's |
1321 | * free space is not mixed in with that of any other group). | 1321 | * free space is not mixed in with that of any other group). |
1322 | * in addition, we start the search within this level at a | 1322 | * in addition, we start the search within this level at a |
1323 | * height of the dmapctl dmtree at which the nodes distinctly | 1323 | * height of the dmapctl dmtree at which the nodes distinctly |
1324 | * describe the allocation group's free space. at this height, | 1324 | * describe the allocation group's free space. at this height, |
1325 | * the allocation group's free space may be represented by 1 | 1325 | * the allocation group's free space may be represented by 1 |
1326 | * or two sub-trees, depending on the allocation group size. | 1326 | * or two sub-trees, depending on the allocation group size. |
1327 | * we search the top nodes of these subtrees left to right for | 1327 | * we search the top nodes of these subtrees left to right for |
1328 | * sufficient free space. if sufficient free space is found, | 1328 | * sufficient free space. if sufficient free space is found, |
1329 | * the subtree is searched to find the leftmost leaf that | 1329 | * the subtree is searched to find the leftmost leaf that |
1330 | * has free space. once we have made it to the leaf, we | 1330 | * has free space. once we have made it to the leaf, we |
1331 | * move the search to the next lower level dmap control page | 1331 | * move the search to the next lower level dmap control page |
1332 | * corresponding to this leaf. we continue down the dmap control | 1332 | * corresponding to this leaf. we continue down the dmap control |
1333 | * pages until we find the dmap that contains or starts the | 1333 | * pages until we find the dmap that contains or starts the |
1334 | * sufficient free space and we allocate at this dmap. | 1334 | * sufficient free space and we allocate at this dmap. |
1335 | * | 1335 | * |
1336 | * if the allocation group size is equal to the dmap size, | 1336 | * if the allocation group size is equal to the dmap size, |
1337 | * we'll start at the dmap corresponding to the allocation | 1337 | * we'll start at the dmap corresponding to the allocation |
1338 | * group and attempt the allocation at this level. | 1338 | * group and attempt the allocation at this level. |
1339 | * | 1339 | * |
1340 | * the dmap control page search is also not performed if the | 1340 | * the dmap control page search is also not performed if the |
1341 | * allocation group is completely free and we go to the first | 1341 | * allocation group is completely free and we go to the first |
1342 | * dmap of the allocation group to do the allocation. this is | 1342 | * dmap of the allocation group to do the allocation. this is |
1343 | * done because the allocation group may be part (not the first | 1343 | * done because the allocation group may be part (not the first |
1344 | * part) of a larger binary buddy system, causing the dmap | 1344 | * part) of a larger binary buddy system, causing the dmap |
1345 | * control pages to indicate no free space (NOFREE) within | 1345 | * control pages to indicate no free space (NOFREE) within |
1346 | * the allocation group. | 1346 | * the allocation group. |
1347 | * | 1347 | * |
1348 | * PARAMETERS: | 1348 | * PARAMETERS: |
1349 | * bmp - pointer to bmap descriptor | 1349 | * bmp - pointer to bmap descriptor |
1350 | * agno - allocation group number. | 1350 | * agno - allocation group number. |
1351 | * nblocks - actual number of contiguous free blocks desired. | 1351 | * nblocks - actual number of contiguous free blocks desired. |
1352 | * l2nb - log2 number of contiguous free blocks desired. | 1352 | * l2nb - log2 number of contiguous free blocks desired. |
1353 | * results - on successful return, set to the starting block number | 1353 | * results - on successful return, set to the starting block number |
1354 | * of the newly allocated range. | 1354 | * of the newly allocated range. |
1355 | * | 1355 | * |
1356 | * RETURN VALUES: | 1356 | * RETURN VALUES: |
1357 | * 0 - success | 1357 | * 0 - success |
1358 | * -ENOSPC - insufficient disk resources | 1358 | * -ENOSPC - insufficient disk resources |
1359 | * -EIO - i/o error | 1359 | * -EIO - i/o error |
1360 | * | 1360 | * |
1361 | * note: IWRITE_LOCK(ipmap) held on entry/exit; | 1361 | * note: IWRITE_LOCK(ipmap) held on entry/exit; |
1362 | */ | 1362 | */ |
1363 | static int | 1363 | static int |
1364 | dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | 1364 | dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) |
1365 | { | 1365 | { |
1366 | struct metapage *mp; | 1366 | struct metapage *mp; |
1367 | struct dmapctl *dcp; | 1367 | struct dmapctl *dcp; |
1368 | int rc, ti, i, k, m, n, agperlev; | 1368 | int rc, ti, i, k, m, n, agperlev; |
1369 | s64 blkno, lblkno; | 1369 | s64 blkno, lblkno; |
1370 | int budmin; | 1370 | int budmin; |
1371 | 1371 | ||
1372 | /* allocation request should not be for more than the | 1372 | /* allocation request should not be for more than the |
1373 | * allocation group size. | 1373 | * allocation group size. |
1374 | */ | 1374 | */ |
1375 | if (l2nb > bmp->db_agl2size) { | 1375 | if (l2nb > bmp->db_agl2size) { |
1376 | jfs_error(bmp->db_ipbmap->i_sb, | 1376 | jfs_error(bmp->db_ipbmap->i_sb, |
1377 | "dbAllocAG: allocation request is larger than the " | 1377 | "dbAllocAG: allocation request is larger than the " |
1378 | "allocation group size"); | 1378 | "allocation group size"); |
1379 | return -EIO; | 1379 | return -EIO; |
1380 | } | 1380 | } |
1381 | 1381 | ||
1382 | /* determine the starting block number of the allocation | 1382 | /* determine the starting block number of the allocation |
1383 | * group. | 1383 | * group. |
1384 | */ | 1384 | */ |
1385 | blkno = (s64) agno << bmp->db_agl2size; | 1385 | blkno = (s64) agno << bmp->db_agl2size; |
1386 | 1386 | ||
1387 | /* check if the allocation group size is the minimum allocation | 1387 | /* check if the allocation group size is the minimum allocation |
1388 | * group size or if the allocation group is completely free. if | 1388 | * group size or if the allocation group is completely free. if |
1389 | * the allocation group size is the minimum size of BPERDMAP (i.e. | 1389 | * the allocation group size is the minimum size of BPERDMAP (i.e. |
1390 | * 1 dmap), there is no need to search the dmap control page (below) | 1390 | * 1 dmap), there is no need to search the dmap control page (below) |
1391 | * that fully describes the allocation group since the allocation | 1391 | * that fully describes the allocation group since the allocation |
1392 | * group is already fully described by a dmap. in this case, we | 1392 | * group is already fully described by a dmap. in this case, we |
1393 | * just call dbAllocCtl() to search the dmap tree and allocate the | 1393 | * just call dbAllocCtl() to search the dmap tree and allocate the |
1394 | * required space if available. | 1394 | * required space if available. |
1395 | * | 1395 | * |
1396 | * if the allocation group is completely free, dbAllocCtl() is | 1396 | * if the allocation group is completely free, dbAllocCtl() is |
1397 | * also called to allocate the required space. this is done for | 1397 | * also called to allocate the required space. this is done for |
1398 | * two reasons. first, it makes no sense searching the dmap control | 1398 | * two reasons. first, it makes no sense searching the dmap control |
1399 | * pages for free space when we know that free space exists. second, | 1399 | * pages for free space when we know that free space exists. second, |
1400 | * the dmap control pages may indicate that the allocation group | 1400 | * the dmap control pages may indicate that the allocation group |
1401 | * has no free space if the allocation group is part (not the first | 1401 | * has no free space if the allocation group is part (not the first |
1402 | * part) of a larger binary buddy system. | 1402 | * part) of a larger binary buddy system. |
1403 | */ | 1403 | */ |
1404 | if (bmp->db_agsize == BPERDMAP | 1404 | if (bmp->db_agsize == BPERDMAP |
1405 | || bmp->db_agfree[agno] == bmp->db_agsize) { | 1405 | || bmp->db_agfree[agno] == bmp->db_agsize) { |
1406 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 1406 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); |
1407 | if ((rc == -ENOSPC) && | 1407 | if ((rc == -ENOSPC) && |
1408 | (bmp->db_agfree[agno] == bmp->db_agsize)) { | 1408 | (bmp->db_agfree[agno] == bmp->db_agsize)) { |
1409 | printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n", | 1409 | printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n", |
1410 | (unsigned long long) blkno, | 1410 | (unsigned long long) blkno, |
1411 | (unsigned long long) nblocks); | 1411 | (unsigned long long) nblocks); |
1412 | jfs_error(bmp->db_ipbmap->i_sb, | 1412 | jfs_error(bmp->db_ipbmap->i_sb, |
1413 | "dbAllocAG: dbAllocCtl failed in free AG"); | 1413 | "dbAllocAG: dbAllocCtl failed in free AG"); |
1414 | } | 1414 | } |
1415 | return (rc); | 1415 | return (rc); |
1416 | } | 1416 | } |
1417 | 1417 | ||
1418 | /* the buffer for the dmap control page that fully describes the | 1418 | /* the buffer for the dmap control page that fully describes the |
1419 | * allocation group. | 1419 | * allocation group. |
1420 | */ | 1420 | */ |
1421 | lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel); | 1421 | lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel); |
1422 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 1422 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); |
1423 | if (mp == NULL) | 1423 | if (mp == NULL) |
1424 | return -EIO; | 1424 | return -EIO; |
1425 | dcp = (struct dmapctl *) mp->data; | 1425 | dcp = (struct dmapctl *) mp->data; |
1426 | budmin = dcp->budmin; | 1426 | budmin = dcp->budmin; |
1427 | 1427 | ||
1428 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 1428 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
1429 | jfs_error(bmp->db_ipbmap->i_sb, | 1429 | jfs_error(bmp->db_ipbmap->i_sb, |
1430 | "dbAllocAG: Corrupt dmapctl page"); | 1430 | "dbAllocAG: Corrupt dmapctl page"); |
1431 | release_metapage(mp); | 1431 | release_metapage(mp); |
1432 | return -EIO; | 1432 | return -EIO; |
1433 | } | 1433 | } |
1434 | 1434 | ||
1435 | /* search the subtree(s) of the dmap control page that describes | 1435 | /* search the subtree(s) of the dmap control page that describes |
1436 | * the allocation group, looking for sufficient free space. to begin, | 1436 | * the allocation group, looking for sufficient free space. to begin, |
1437 | * determine how many allocation groups are represented in a dmap | 1437 | * determine how many allocation groups are represented in a dmap |
1438 | * control page at the control page level (i.e. L0, L1, L2) that | 1438 | * control page at the control page level (i.e. L0, L1, L2) that |
1439 | * fully describes an allocation group. next, determine the starting | 1439 | * fully describes an allocation group. next, determine the starting |
1440 | * tree index of this allocation group within the control page. | 1440 | * tree index of this allocation group within the control page. |
1441 | */ | 1441 | */ |
1442 | agperlev = | 1442 | agperlev = |
1443 | (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; | 1443 | (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; |
1444 | ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); | 1444 | ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); |
1445 | 1445 | ||
1446 | /* dmap control page trees fan-out by 4 and a single allocation | 1446 | /* dmap control page trees fan-out by 4 and a single allocation |
1447 | * group may be described by 1 or 2 subtrees within the ag level | 1447 | * group may be described by 1 or 2 subtrees within the ag level |
1448 | * dmap control page, depending upon the ag size. examine the ag's | 1448 | * dmap control page, depending upon the ag size. examine the ag's |
1449 | * subtrees for sufficient free space, starting with the leftmost | 1449 | * subtrees for sufficient free space, starting with the leftmost |
1450 | * subtree. | 1450 | * subtree. |
1451 | */ | 1451 | */ |
1452 | for (i = 0; i < bmp->db_agwidth; i++, ti++) { | 1452 | for (i = 0; i < bmp->db_agwidth; i++, ti++) { |
1453 | /* is there sufficient free space ? | 1453 | /* is there sufficient free space ? |
1454 | */ | 1454 | */ |
1455 | if (l2nb > dcp->stree[ti]) | 1455 | if (l2nb > dcp->stree[ti]) |
1456 | continue; | 1456 | continue; |
1457 | 1457 | ||
1458 | /* sufficient free space found in a subtree. now search down | 1458 | /* sufficient free space found in a subtree. now search down |
1459 | * the subtree to find the leftmost leaf that describes this | 1459 | * the subtree to find the leftmost leaf that describes this |
1460 | * free space. | 1460 | * free space. |
1461 | */ | 1461 | */ |
1462 | for (k = bmp->db_agheigth; k > 0; k--) { | 1462 | for (k = bmp->db_agheigth; k > 0; k--) { |
1463 | for (n = 0, m = (ti << 2) + 1; n < 4; n++) { | 1463 | for (n = 0, m = (ti << 2) + 1; n < 4; n++) { |
1464 | if (l2nb <= dcp->stree[m + n]) { | 1464 | if (l2nb <= dcp->stree[m + n]) { |
1465 | ti = m + n; | 1465 | ti = m + n; |
1466 | break; | 1466 | break; |
1467 | } | 1467 | } |
1468 | } | 1468 | } |
1469 | if (n == 4) { | 1469 | if (n == 4) { |
1470 | jfs_error(bmp->db_ipbmap->i_sb, | 1470 | jfs_error(bmp->db_ipbmap->i_sb, |
1471 | "dbAllocAG: failed descending stree"); | 1471 | "dbAllocAG: failed descending stree"); |
1472 | release_metapage(mp); | 1472 | release_metapage(mp); |
1473 | return -EIO; | 1473 | return -EIO; |
1474 | } | 1474 | } |
1475 | } | 1475 | } |
1476 | 1476 | ||
1477 | /* determine the block number within the file system | 1477 | /* determine the block number within the file system |
1478 | * that corresponds to this leaf. | 1478 | * that corresponds to this leaf. |
1479 | */ | 1479 | */ |
1480 | if (bmp->db_aglevel == 2) | 1480 | if (bmp->db_aglevel == 2) |
1481 | blkno = 0; | 1481 | blkno = 0; |
1482 | else if (bmp->db_aglevel == 1) | 1482 | else if (bmp->db_aglevel == 1) |
1483 | blkno &= ~(MAXL1SIZE - 1); | 1483 | blkno &= ~(MAXL1SIZE - 1); |
1484 | else /* bmp->db_aglevel == 0 */ | 1484 | else /* bmp->db_aglevel == 0 */ |
1485 | blkno &= ~(MAXL0SIZE - 1); | 1485 | blkno &= ~(MAXL0SIZE - 1); |
1486 | 1486 | ||
1487 | blkno += | 1487 | blkno += |
1488 | ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin; | 1488 | ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin; |
1489 | 1489 | ||
1490 | /* release the buffer in preparation for going down | 1490 | /* release the buffer in preparation for going down |
1491 | * the next level of dmap control pages. | 1491 | * the next level of dmap control pages. |
1492 | */ | 1492 | */ |
1493 | release_metapage(mp); | 1493 | release_metapage(mp); |
1494 | 1494 | ||
1495 | /* check if we need to continue to search down the lower | 1495 | /* check if we need to continue to search down the lower |
1496 | * level dmap control pages. we need to if the number of | 1496 | * level dmap control pages. we need to if the number of |
1497 | * blocks required is less than maximum number of blocks | 1497 | * blocks required is less than maximum number of blocks |
1498 | * described at the next lower level. | 1498 | * described at the next lower level. |
1499 | */ | 1499 | */ |
1500 | if (l2nb < budmin) { | 1500 | if (l2nb < budmin) { |
1501 | 1501 | ||
1502 | /* search the lower level dmap control pages to get | 1502 | /* search the lower level dmap control pages to get |
1503 | * the starting block number of the dmap that | 1503 | * the starting block number of the dmap that |
1504 | * contains or starts off the free space. | 1504 | * contains or starts off the free space. |
1505 | */ | 1505 | */ |
1506 | if ((rc = | 1506 | if ((rc = |
1507 | dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1, | 1507 | dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1, |
1508 | &blkno))) { | 1508 | &blkno))) { |
1509 | if (rc == -ENOSPC) { | 1509 | if (rc == -ENOSPC) { |
1510 | jfs_error(bmp->db_ipbmap->i_sb, | 1510 | jfs_error(bmp->db_ipbmap->i_sb, |
1511 | "dbAllocAG: control page " | 1511 | "dbAllocAG: control page " |
1512 | "inconsistent"); | 1512 | "inconsistent"); |
1513 | return -EIO; | 1513 | return -EIO; |
1514 | } | 1514 | } |
1515 | return (rc); | 1515 | return (rc); |
1516 | } | 1516 | } |
1517 | } | 1517 | } |
1518 | 1518 | ||
1519 | /* allocate the blocks. | 1519 | /* allocate the blocks. |
1520 | */ | 1520 | */ |
1521 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 1521 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); |
1522 | if (rc == -ENOSPC) { | 1522 | if (rc == -ENOSPC) { |
1523 | jfs_error(bmp->db_ipbmap->i_sb, | 1523 | jfs_error(bmp->db_ipbmap->i_sb, |
1524 | "dbAllocAG: unable to allocate blocks"); | 1524 | "dbAllocAG: unable to allocate blocks"); |
1525 | rc = -EIO; | 1525 | rc = -EIO; |
1526 | } | 1526 | } |
1527 | return (rc); | 1527 | return (rc); |
1528 | } | 1528 | } |
1529 | 1529 | ||
1530 | /* no space in the allocation group. release the buffer and | 1530 | /* no space in the allocation group. release the buffer and |
1531 | * return -ENOSPC. | 1531 | * return -ENOSPC. |
1532 | */ | 1532 | */ |
1533 | release_metapage(mp); | 1533 | release_metapage(mp); |
1534 | 1534 | ||
1535 | return -ENOSPC; | 1535 | return -ENOSPC; |
1536 | } | 1536 | } |
1537 | 1537 | ||
1538 | 1538 | ||
1539 | /* | 1539 | /* |
1540 | * NAME: dbAllocAny() | 1540 | * NAME: dbAllocAny() |
1541 | * | 1541 | * |
1542 | * FUNCTION: attempt to allocate the specified number of contiguous | 1542 | * FUNCTION: attempt to allocate the specified number of contiguous |
1543 | * free blocks anywhere in the file system. | 1543 | * free blocks anywhere in the file system. |
1544 | * | 1544 | * |
1545 | * dbAllocAny() attempts to find the sufficient free space by | 1545 | * dbAllocAny() attempts to find the sufficient free space by |
1546 | * searching down the dmap control pages, starting with the | 1546 | * searching down the dmap control pages, starting with the |
1547 | * highest level (i.e. L0, L1, L2) control page. if free space | 1547 | * highest level (i.e. L0, L1, L2) control page. if free space |
1548 | * large enough to satisfy the desired free space is found, the | 1548 | * large enough to satisfy the desired free space is found, the |
1549 | * desired free space is allocated. | 1549 | * desired free space is allocated. |
1550 | * | 1550 | * |
1551 | * PARAMETERS: | 1551 | * PARAMETERS: |
1552 | * bmp - pointer to bmap descriptor | 1552 | * bmp - pointer to bmap descriptor |
1553 | * nblocks - actual number of contiguous free blocks desired. | 1553 | * nblocks - actual number of contiguous free blocks desired. |
1554 | * l2nb - log2 number of contiguous free blocks desired. | 1554 | * l2nb - log2 number of contiguous free blocks desired. |
1555 | * results - on successful return, set to the starting block number | 1555 | * results - on successful return, set to the starting block number |
1556 | * of the newly allocated range. | 1556 | * of the newly allocated range. |
1557 | * | 1557 | * |
1558 | * RETURN VALUES: | 1558 | * RETURN VALUES: |
1559 | * 0 - success | 1559 | * 0 - success |
1560 | * -ENOSPC - insufficient disk resources | 1560 | * -ENOSPC - insufficient disk resources |
1561 | * -EIO - i/o error | 1561 | * -EIO - i/o error |
1562 | * | 1562 | * |
1563 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 1563 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; |
1564 | */ | 1564 | */ |
1565 | static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) | 1565 | static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) |
1566 | { | 1566 | { |
1567 | int rc; | 1567 | int rc; |
1568 | s64 blkno = 0; | 1568 | s64 blkno = 0; |
1569 | 1569 | ||
1570 | /* starting with the top level dmap control page, search | 1570 | /* starting with the top level dmap control page, search |
1571 | * down the dmap control levels for sufficient free space. | 1571 | * down the dmap control levels for sufficient free space. |
1572 | * if free space is found, dbFindCtl() returns the starting | 1572 | * if free space is found, dbFindCtl() returns the starting |
1573 | * block number of the dmap that contains or starts off the | 1573 | * block number of the dmap that contains or starts off the |
1574 | * range of free space. | 1574 | * range of free space. |
1575 | */ | 1575 | */ |
1576 | if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno))) | 1576 | if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno))) |
1577 | return (rc); | 1577 | return (rc); |
1578 | 1578 | ||
1579 | /* allocate the blocks. | 1579 | /* allocate the blocks. |
1580 | */ | 1580 | */ |
1581 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 1581 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); |
1582 | if (rc == -ENOSPC) { | 1582 | if (rc == -ENOSPC) { |
1583 | jfs_error(bmp->db_ipbmap->i_sb, | 1583 | jfs_error(bmp->db_ipbmap->i_sb, |
1584 | "dbAllocAny: unable to allocate blocks"); | 1584 | "dbAllocAny: unable to allocate blocks"); |
1585 | return -EIO; | 1585 | return -EIO; |
1586 | } | 1586 | } |
1587 | return (rc); | 1587 | return (rc); |
1588 | } | 1588 | } |
1589 | 1589 | ||
1590 | 1590 | ||
1591 | /* | 1591 | /* |
1592 | * NAME: dbFindCtl() | 1592 | * NAME: dbFindCtl() |
1593 | * | 1593 | * |
1594 | * FUNCTION: starting at a specified dmap control page level and block | 1594 | * FUNCTION: starting at a specified dmap control page level and block |
1595 | * number, search down the dmap control levels for a range of | 1595 | * number, search down the dmap control levels for a range of |
1596 | * contiguous free blocks large enough to satisfy an allocation | 1596 | * contiguous free blocks large enough to satisfy an allocation |
1597 | * request for the specified number of free blocks. | 1597 | * request for the specified number of free blocks. |
1598 | * | 1598 | * |
1599 | * if sufficient contiguous free blocks are found, this routine | 1599 | * if sufficient contiguous free blocks are found, this routine |
1600 | * returns the starting block number within a dmap page that | 1600 | * returns the starting block number within a dmap page that |
1601 | * contains or starts a range of contiqious free blocks that | 1601 | * contains or starts a range of contiqious free blocks that |
1602 | * is sufficient in size. | 1602 | * is sufficient in size. |
1603 | * | 1603 | * |
1604 | * PARAMETERS: | 1604 | * PARAMETERS: |
1605 | * bmp - pointer to bmap descriptor | 1605 | * bmp - pointer to bmap descriptor |
1606 | * level - starting dmap control page level. | 1606 | * level - starting dmap control page level. |
1607 | * l2nb - log2 number of contiguous free blocks desired. | 1607 | * l2nb - log2 number of contiguous free blocks desired. |
1608 | * *blkno - on entry, starting block number for conducting the search. | 1608 | * *blkno - on entry, starting block number for conducting the search. |
1609 | * on successful return, the first block within a dmap page | 1609 | * on successful return, the first block within a dmap page |
1610 | * that contains or starts a range of contiguous free blocks. | 1610 | * that contains or starts a range of contiguous free blocks. |
1611 | * | 1611 | * |
1612 | * RETURN VALUES: | 1612 | * RETURN VALUES: |
1613 | * 0 - success | 1613 | * 0 - success |
1614 | * -ENOSPC - insufficient disk resources | 1614 | * -ENOSPC - insufficient disk resources |
1615 | * -EIO - i/o error | 1615 | * -EIO - i/o error |
1616 | * | 1616 | * |
1617 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 1617 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; |
1618 | */ | 1618 | */ |
1619 | static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | 1619 | static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) |
1620 | { | 1620 | { |
1621 | int rc, leafidx, lev; | 1621 | int rc, leafidx, lev; |
1622 | s64 b, lblkno; | 1622 | s64 b, lblkno; |
1623 | struct dmapctl *dcp; | 1623 | struct dmapctl *dcp; |
1624 | int budmin; | 1624 | int budmin; |
1625 | struct metapage *mp; | 1625 | struct metapage *mp; |
1626 | 1626 | ||
1627 | /* starting at the specified dmap control page level and block | 1627 | /* starting at the specified dmap control page level and block |
1628 | * number, search down the dmap control levels for the starting | 1628 | * number, search down the dmap control levels for the starting |
1629 | * block number of a dmap page that contains or starts off | 1629 | * block number of a dmap page that contains or starts off |
1630 | * sufficient free blocks. | 1630 | * sufficient free blocks. |
1631 | */ | 1631 | */ |
1632 | for (lev = level, b = *blkno; lev >= 0; lev--) { | 1632 | for (lev = level, b = *blkno; lev >= 0; lev--) { |
1633 | /* get the buffer of the dmap control page for the block | 1633 | /* get the buffer of the dmap control page for the block |
1634 | * number and level (i.e. L0, L1, L2). | 1634 | * number and level (i.e. L0, L1, L2). |
1635 | */ | 1635 | */ |
1636 | lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev); | 1636 | lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev); |
1637 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 1637 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); |
1638 | if (mp == NULL) | 1638 | if (mp == NULL) |
1639 | return -EIO; | 1639 | return -EIO; |
1640 | dcp = (struct dmapctl *) mp->data; | 1640 | dcp = (struct dmapctl *) mp->data; |
1641 | budmin = dcp->budmin; | 1641 | budmin = dcp->budmin; |
1642 | 1642 | ||
1643 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 1643 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
1644 | jfs_error(bmp->db_ipbmap->i_sb, | 1644 | jfs_error(bmp->db_ipbmap->i_sb, |
1645 | "dbFindCtl: Corrupt dmapctl page"); | 1645 | "dbFindCtl: Corrupt dmapctl page"); |
1646 | release_metapage(mp); | 1646 | release_metapage(mp); |
1647 | return -EIO; | 1647 | return -EIO; |
1648 | } | 1648 | } |
1649 | 1649 | ||
1650 | /* search the tree within the dmap control page for | 1650 | /* search the tree within the dmap control page for |
1651 | * sufficent free space. if sufficient free space is found, | 1651 | * sufficent free space. if sufficient free space is found, |
1652 | * dbFindLeaf() returns the index of the leaf at which | 1652 | * dbFindLeaf() returns the index of the leaf at which |
1653 | * free space was found. | 1653 | * free space was found. |
1654 | */ | 1654 | */ |
1655 | rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx); | 1655 | rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx); |
1656 | 1656 | ||
1657 | /* release the buffer. | 1657 | /* release the buffer. |
1658 | */ | 1658 | */ |
1659 | release_metapage(mp); | 1659 | release_metapage(mp); |
1660 | 1660 | ||
1661 | /* space found ? | 1661 | /* space found ? |
1662 | */ | 1662 | */ |
1663 | if (rc) { | 1663 | if (rc) { |
1664 | if (lev != level) { | 1664 | if (lev != level) { |
1665 | jfs_error(bmp->db_ipbmap->i_sb, | 1665 | jfs_error(bmp->db_ipbmap->i_sb, |
1666 | "dbFindCtl: dmap inconsistent"); | 1666 | "dbFindCtl: dmap inconsistent"); |
1667 | return -EIO; | 1667 | return -EIO; |
1668 | } | 1668 | } |
1669 | return -ENOSPC; | 1669 | return -ENOSPC; |
1670 | } | 1670 | } |
1671 | 1671 | ||
1672 | /* adjust the block number to reflect the location within | 1672 | /* adjust the block number to reflect the location within |
1673 | * the dmap control page (i.e. the leaf) at which free | 1673 | * the dmap control page (i.e. the leaf) at which free |
1674 | * space was found. | 1674 | * space was found. |
1675 | */ | 1675 | */ |
1676 | b += (((s64) leafidx) << budmin); | 1676 | b += (((s64) leafidx) << budmin); |
1677 | 1677 | ||
1678 | /* we stop the search at this dmap control page level if | 1678 | /* we stop the search at this dmap control page level if |
1679 | * the number of blocks required is greater than or equal | 1679 | * the number of blocks required is greater than or equal |
1680 | * to the maximum number of blocks described at the next | 1680 | * to the maximum number of blocks described at the next |
1681 | * (lower) level. | 1681 | * (lower) level. |
1682 | */ | 1682 | */ |
1683 | if (l2nb >= budmin) | 1683 | if (l2nb >= budmin) |
1684 | break; | 1684 | break; |
1685 | } | 1685 | } |
1686 | 1686 | ||
1687 | *blkno = b; | 1687 | *blkno = b; |
1688 | return (0); | 1688 | return (0); |
1689 | } | 1689 | } |
1690 | 1690 | ||
1691 | 1691 | ||
1692 | /* | 1692 | /* |
1693 | * NAME: dbAllocCtl() | 1693 | * NAME: dbAllocCtl() |
1694 | * | 1694 | * |
1695 | * FUNCTION: attempt to allocate a specified number of contiguous | 1695 | * FUNCTION: attempt to allocate a specified number of contiguous |
1696 | * blocks starting within a specific dmap. | 1696 | * blocks starting within a specific dmap. |
1697 | * | 1697 | * |
1698 | * this routine is called by higher level routines that search | 1698 | * this routine is called by higher level routines that search |
1699 | * the dmap control pages above the actual dmaps for contiguous | 1699 | * the dmap control pages above the actual dmaps for contiguous |
1700 | * free space. the result of successful searches by these | 1700 | * free space. the result of successful searches by these |
1701 | * routines are the starting block numbers within dmaps, with | 1701 | * routines are the starting block numbers within dmaps, with |
1702 | * the dmaps themselves containing the desired contiguous free | 1702 | * the dmaps themselves containing the desired contiguous free |
1703 | * space or starting a contiguous free space of desired size | 1703 | * space or starting a contiguous free space of desired size |
1704 | * that is made up of the blocks of one or more dmaps. these | 1704 | * that is made up of the blocks of one or more dmaps. these |
1705 | * calls should not fail due to insufficent resources. | 1705 | * calls should not fail due to insufficent resources. |
1706 | * | 1706 | * |
1707 | * this routine is called in some cases where it is not known | 1707 | * this routine is called in some cases where it is not known |
1708 | * whether it will fail due to insufficient resources. more | 1708 | * whether it will fail due to insufficient resources. more |
1709 | * specifically, this occurs when allocating from an allocation | 1709 | * specifically, this occurs when allocating from an allocation |
1710 | * group whose size is equal to the number of blocks per dmap. | 1710 | * group whose size is equal to the number of blocks per dmap. |
1711 | * in this case, the dmap control pages are not examined prior | 1711 | * in this case, the dmap control pages are not examined prior |
1712 | * to calling this routine (to save pathlength) and the call | 1712 | * to calling this routine (to save pathlength) and the call |
1713 | * might fail. | 1713 | * might fail. |
1714 | * | 1714 | * |
1715 | * for a request size that fits within a dmap, this routine relies | 1715 | * for a request size that fits within a dmap, this routine relies |
1716 | * upon the dmap's dmtree to find the requested contiguous free | 1716 | * upon the dmap's dmtree to find the requested contiguous free |
1717 | * space. for request sizes that are larger than a dmap, the | 1717 | * space. for request sizes that are larger than a dmap, the |
1718 | * requested free space will start at the first block of the | 1718 | * requested free space will start at the first block of the |
1719 | * first dmap (i.e. blkno). | 1719 | * first dmap (i.e. blkno). |
1720 | * | 1720 | * |
1721 | * PARAMETERS: | 1721 | * PARAMETERS: |
1722 | * bmp - pointer to bmap descriptor | 1722 | * bmp - pointer to bmap descriptor |
1723 | * nblocks - actual number of contiguous free blocks to allocate. | 1723 | * nblocks - actual number of contiguous free blocks to allocate. |
1724 | * l2nb - log2 number of contiguous free blocks to allocate. | 1724 | * l2nb - log2 number of contiguous free blocks to allocate. |
1725 | * blkno - starting block number of the dmap to start the allocation | 1725 | * blkno - starting block number of the dmap to start the allocation |
1726 | * from. | 1726 | * from. |
1727 | * results - on successful return, set to the starting block number | 1727 | * results - on successful return, set to the starting block number |
1728 | * of the newly allocated range. | 1728 | * of the newly allocated range. |
1729 | * | 1729 | * |
1730 | * RETURN VALUES: | 1730 | * RETURN VALUES: |
1731 | * 0 - success | 1731 | * 0 - success |
1732 | * -ENOSPC - insufficient disk resources | 1732 | * -ENOSPC - insufficient disk resources |
1733 | * -EIO - i/o error | 1733 | * -EIO - i/o error |
1734 | * | 1734 | * |
1735 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 1735 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; |
1736 | */ | 1736 | */ |
1737 | static int | 1737 | static int |
1738 | dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | 1738 | dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) |
1739 | { | 1739 | { |
1740 | int rc, nb; | 1740 | int rc, nb; |
1741 | s64 b, lblkno, n; | 1741 | s64 b, lblkno, n; |
1742 | struct metapage *mp; | 1742 | struct metapage *mp; |
1743 | struct dmap *dp; | 1743 | struct dmap *dp; |
1744 | 1744 | ||
1745 | /* check if the allocation request is confined to a single dmap. | 1745 | /* check if the allocation request is confined to a single dmap. |
1746 | */ | 1746 | */ |
1747 | if (l2nb <= L2BPERDMAP) { | 1747 | if (l2nb <= L2BPERDMAP) { |
1748 | /* get the buffer for the dmap. | 1748 | /* get the buffer for the dmap. |
1749 | */ | 1749 | */ |
1750 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 1750 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); |
1751 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 1751 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); |
1752 | if (mp == NULL) | 1752 | if (mp == NULL) |
1753 | return -EIO; | 1753 | return -EIO; |
1754 | dp = (struct dmap *) mp->data; | 1754 | dp = (struct dmap *) mp->data; |
1755 | 1755 | ||
1756 | /* try to allocate the blocks. | 1756 | /* try to allocate the blocks. |
1757 | */ | 1757 | */ |
1758 | rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results); | 1758 | rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results); |
1759 | if (rc == 0) | 1759 | if (rc == 0) |
1760 | mark_metapage_dirty(mp); | 1760 | mark_metapage_dirty(mp); |
1761 | 1761 | ||
1762 | release_metapage(mp); | 1762 | release_metapage(mp); |
1763 | 1763 | ||
1764 | return (rc); | 1764 | return (rc); |
1765 | } | 1765 | } |
1766 | 1766 | ||
1767 | /* allocation request involving multiple dmaps. it must start on | 1767 | /* allocation request involving multiple dmaps. it must start on |
1768 | * a dmap boundary. | 1768 | * a dmap boundary. |
1769 | */ | 1769 | */ |
1770 | assert((blkno & (BPERDMAP - 1)) == 0); | 1770 | assert((blkno & (BPERDMAP - 1)) == 0); |
1771 | 1771 | ||
1772 | /* allocate the blocks dmap by dmap. | 1772 | /* allocate the blocks dmap by dmap. |
1773 | */ | 1773 | */ |
1774 | for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) { | 1774 | for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) { |
1775 | /* get the buffer for the dmap. | 1775 | /* get the buffer for the dmap. |
1776 | */ | 1776 | */ |
1777 | lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); | 1777 | lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); |
1778 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 1778 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); |
1779 | if (mp == NULL) { | 1779 | if (mp == NULL) { |
1780 | rc = -EIO; | 1780 | rc = -EIO; |
1781 | goto backout; | 1781 | goto backout; |
1782 | } | 1782 | } |
1783 | dp = (struct dmap *) mp->data; | 1783 | dp = (struct dmap *) mp->data; |
1784 | 1784 | ||
1785 | /* the dmap better be all free. | 1785 | /* the dmap better be all free. |
1786 | */ | 1786 | */ |
1787 | if (dp->tree.stree[ROOT] != L2BPERDMAP) { | 1787 | if (dp->tree.stree[ROOT] != L2BPERDMAP) { |
1788 | release_metapage(mp); | 1788 | release_metapage(mp); |
1789 | jfs_error(bmp->db_ipbmap->i_sb, | 1789 | jfs_error(bmp->db_ipbmap->i_sb, |
1790 | "dbAllocCtl: the dmap is not all free"); | 1790 | "dbAllocCtl: the dmap is not all free"); |
1791 | rc = -EIO; | 1791 | rc = -EIO; |
1792 | goto backout; | 1792 | goto backout; |
1793 | } | 1793 | } |
1794 | 1794 | ||
1795 | /* determine how many blocks to allocate from this dmap. | 1795 | /* determine how many blocks to allocate from this dmap. |
1796 | */ | 1796 | */ |
1797 | nb = min(n, (s64)BPERDMAP); | 1797 | nb = min(n, (s64)BPERDMAP); |
1798 | 1798 | ||
1799 | /* allocate the blocks from the dmap. | 1799 | /* allocate the blocks from the dmap. |
1800 | */ | 1800 | */ |
1801 | if ((rc = dbAllocDmap(bmp, dp, b, nb))) { | 1801 | if ((rc = dbAllocDmap(bmp, dp, b, nb))) { |
1802 | release_metapage(mp); | 1802 | release_metapage(mp); |
1803 | goto backout; | 1803 | goto backout; |
1804 | } | 1804 | } |
1805 | 1805 | ||
1806 | /* write the buffer. | 1806 | /* write the buffer. |
1807 | */ | 1807 | */ |
1808 | write_metapage(mp); | 1808 | write_metapage(mp); |
1809 | } | 1809 | } |
1810 | 1810 | ||
1811 | /* set the results (starting block number) and return. | 1811 | /* set the results (starting block number) and return. |
1812 | */ | 1812 | */ |
1813 | *results = blkno; | 1813 | *results = blkno; |
1814 | return (0); | 1814 | return (0); |
1815 | 1815 | ||
1816 | /* something failed in handling an allocation request involving | 1816 | /* something failed in handling an allocation request involving |
1817 | * multiple dmaps. we'll try to clean up by backing out any | 1817 | * multiple dmaps. we'll try to clean up by backing out any |
1818 | * allocation that has already happened for this request. if | 1818 | * allocation that has already happened for this request. if |
1819 | * we fail in backing out the allocation, we'll mark the file | 1819 | * we fail in backing out the allocation, we'll mark the file |
1820 | * system to indicate that blocks have been leaked. | 1820 | * system to indicate that blocks have been leaked. |
1821 | */ | 1821 | */ |
1822 | backout: | 1822 | backout: |
1823 | 1823 | ||
1824 | /* try to backout the allocations dmap by dmap. | 1824 | /* try to backout the allocations dmap by dmap. |
1825 | */ | 1825 | */ |
1826 | for (n = nblocks - n, b = blkno; n > 0; | 1826 | for (n = nblocks - n, b = blkno; n > 0; |
1827 | n -= BPERDMAP, b += BPERDMAP) { | 1827 | n -= BPERDMAP, b += BPERDMAP) { |
1828 | /* get the buffer for this dmap. | 1828 | /* get the buffer for this dmap. |
1829 | */ | 1829 | */ |
1830 | lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); | 1830 | lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); |
1831 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 1831 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); |
1832 | if (mp == NULL) { | 1832 | if (mp == NULL) { |
1833 | /* could not back out. mark the file system | 1833 | /* could not back out. mark the file system |
1834 | * to indicate that we have leaked blocks. | 1834 | * to indicate that we have leaked blocks. |
1835 | */ | 1835 | */ |
1836 | jfs_error(bmp->db_ipbmap->i_sb, | 1836 | jfs_error(bmp->db_ipbmap->i_sb, |
1837 | "dbAllocCtl: I/O Error: Block Leakage."); | 1837 | "dbAllocCtl: I/O Error: Block Leakage."); |
1838 | continue; | 1838 | continue; |
1839 | } | 1839 | } |
1840 | dp = (struct dmap *) mp->data; | 1840 | dp = (struct dmap *) mp->data; |
1841 | 1841 | ||
1842 | /* free the blocks is this dmap. | 1842 | /* free the blocks is this dmap. |
1843 | */ | 1843 | */ |
1844 | if (dbFreeDmap(bmp, dp, b, BPERDMAP)) { | 1844 | if (dbFreeDmap(bmp, dp, b, BPERDMAP)) { |
1845 | /* could not back out. mark the file system | 1845 | /* could not back out. mark the file system |
1846 | * to indicate that we have leaked blocks. | 1846 | * to indicate that we have leaked blocks. |
1847 | */ | 1847 | */ |
1848 | release_metapage(mp); | 1848 | release_metapage(mp); |
1849 | jfs_error(bmp->db_ipbmap->i_sb, | 1849 | jfs_error(bmp->db_ipbmap->i_sb, |
1850 | "dbAllocCtl: Block Leakage."); | 1850 | "dbAllocCtl: Block Leakage."); |
1851 | continue; | 1851 | continue; |
1852 | } | 1852 | } |
1853 | 1853 | ||
1854 | /* write the buffer. | 1854 | /* write the buffer. |
1855 | */ | 1855 | */ |
1856 | write_metapage(mp); | 1856 | write_metapage(mp); |
1857 | } | 1857 | } |
1858 | 1858 | ||
1859 | return (rc); | 1859 | return (rc); |
1860 | } | 1860 | } |
1861 | 1861 | ||
1862 | 1862 | ||
1863 | /* | 1863 | /* |
1864 | * NAME: dbAllocDmapLev() | 1864 | * NAME: dbAllocDmapLev() |
1865 | * | 1865 | * |
1866 | * FUNCTION: attempt to allocate a specified number of contiguous blocks | 1866 | * FUNCTION: attempt to allocate a specified number of contiguous blocks |
1867 | * from a specified dmap. | 1867 | * from a specified dmap. |
1868 | * | 1868 | * |
1869 | * this routine checks if the contiguous blocks are available. | 1869 | * this routine checks if the contiguous blocks are available. |
1870 | * if so, nblocks of blocks are allocated; otherwise, ENOSPC is | 1870 | * if so, nblocks of blocks are allocated; otherwise, ENOSPC is |
1871 | * returned. | 1871 | * returned. |
1872 | * | 1872 | * |
1873 | * PARAMETERS: | 1873 | * PARAMETERS: |
1874 | * mp - pointer to bmap descriptor | 1874 | * mp - pointer to bmap descriptor |
1875 | * dp - pointer to dmap to attempt to allocate blocks from. | 1875 | * dp - pointer to dmap to attempt to allocate blocks from. |
1876 | * l2nb - log2 number of contiguous block desired. | 1876 | * l2nb - log2 number of contiguous block desired. |
1877 | * nblocks - actual number of contiguous block desired. | 1877 | * nblocks - actual number of contiguous block desired. |
1878 | * results - on successful return, set to the starting block number | 1878 | * results - on successful return, set to the starting block number |
1879 | * of the newly allocated range. | 1879 | * of the newly allocated range. |
1880 | * | 1880 | * |
1881 | * RETURN VALUES: | 1881 | * RETURN VALUES: |
1882 | * 0 - success | 1882 | * 0 - success |
1883 | * -ENOSPC - insufficient disk resources | 1883 | * -ENOSPC - insufficient disk resources |
1884 | * -EIO - i/o error | 1884 | * -EIO - i/o error |
1885 | * | 1885 | * |
1886 | * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or | 1886 | * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or |
1887 | * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; | 1887 | * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; |
1888 | */ | 1888 | */ |
1889 | static int | 1889 | static int |
1890 | dbAllocDmapLev(struct bmap * bmp, | 1890 | dbAllocDmapLev(struct bmap * bmp, |
1891 | struct dmap * dp, int nblocks, int l2nb, s64 * results) | 1891 | struct dmap * dp, int nblocks, int l2nb, s64 * results) |
1892 | { | 1892 | { |
1893 | s64 blkno; | 1893 | s64 blkno; |
1894 | int leafidx, rc; | 1894 | int leafidx, rc; |
1895 | 1895 | ||
1896 | /* can't be more than a dmaps worth of blocks */ | 1896 | /* can't be more than a dmaps worth of blocks */ |
1897 | assert(l2nb <= L2BPERDMAP); | 1897 | assert(l2nb <= L2BPERDMAP); |
1898 | 1898 | ||
1899 | /* search the tree within the dmap page for sufficient | 1899 | /* search the tree within the dmap page for sufficient |
1900 | * free space. if sufficient free space is found, dbFindLeaf() | 1900 | * free space. if sufficient free space is found, dbFindLeaf() |
1901 | * returns the index of the leaf at which free space was found. | 1901 | * returns the index of the leaf at which free space was found. |
1902 | */ | 1902 | */ |
1903 | if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx)) | 1903 | if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx)) |
1904 | return -ENOSPC; | 1904 | return -ENOSPC; |
1905 | 1905 | ||
1906 | /* determine the block number within the file system corresponding | 1906 | /* determine the block number within the file system corresponding |
1907 | * to the leaf at which free space was found. | 1907 | * to the leaf at which free space was found. |
1908 | */ | 1908 | */ |
1909 | blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD); | 1909 | blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD); |
1910 | 1910 | ||
1911 | /* if not all bits of the dmap word are free, get the starting | 1911 | /* if not all bits of the dmap word are free, get the starting |
1912 | * bit number within the dmap word of the required string of free | 1912 | * bit number within the dmap word of the required string of free |
1913 | * bits and adjust the block number with this value. | 1913 | * bits and adjust the block number with this value. |
1914 | */ | 1914 | */ |
1915 | if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN) | 1915 | if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN) |
1916 | blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb); | 1916 | blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb); |
1917 | 1917 | ||
1918 | /* allocate the blocks */ | 1918 | /* allocate the blocks */ |
1919 | if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) | 1919 | if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) |
1920 | *results = blkno; | 1920 | *results = blkno; |
1921 | 1921 | ||
1922 | return (rc); | 1922 | return (rc); |
1923 | } | 1923 | } |
1924 | 1924 | ||
1925 | 1925 | ||
1926 | /* | 1926 | /* |
1927 | * NAME: dbAllocDmap() | 1927 | * NAME: dbAllocDmap() |
1928 | * | 1928 | * |
1929 | * FUNCTION: adjust the disk allocation map to reflect the allocation | 1929 | * FUNCTION: adjust the disk allocation map to reflect the allocation |
1930 | * of a specified block range within a dmap. | 1930 | * of a specified block range within a dmap. |
1931 | * | 1931 | * |
1932 | * this routine allocates the specified blocks from the dmap | 1932 | * this routine allocates the specified blocks from the dmap |
1933 | * through a call to dbAllocBits(). if the allocation of the | 1933 | * through a call to dbAllocBits(). if the allocation of the |
1934 | * block range causes the maximum string of free blocks within | 1934 | * block range causes the maximum string of free blocks within |
1935 | * the dmap to change (i.e. the value of the root of the dmap's | 1935 | * the dmap to change (i.e. the value of the root of the dmap's |
1936 | * dmtree), this routine will cause this change to be reflected | 1936 | * dmtree), this routine will cause this change to be reflected |
1937 | * up through the appropriate levels of the dmap control pages | 1937 | * up through the appropriate levels of the dmap control pages |
1938 | * by a call to dbAdjCtl() for the L0 dmap control page that | 1938 | * by a call to dbAdjCtl() for the L0 dmap control page that |
1939 | * covers this dmap. | 1939 | * covers this dmap. |
1940 | * | 1940 | * |
1941 | * PARAMETERS: | 1941 | * PARAMETERS: |
1942 | * bmp - pointer to bmap descriptor | 1942 | * bmp - pointer to bmap descriptor |
1943 | * dp - pointer to dmap to allocate the block range from. | 1943 | * dp - pointer to dmap to allocate the block range from. |
1944 | * blkno - starting block number of the block to be allocated. | 1944 | * blkno - starting block number of the block to be allocated. |
1945 | * nblocks - number of blocks to be allocated. | 1945 | * nblocks - number of blocks to be allocated. |
1946 | * | 1946 | * |
1947 | * RETURN VALUES: | 1947 | * RETURN VALUES: |
1948 | * 0 - success | 1948 | * 0 - success |
1949 | * -EIO - i/o error | 1949 | * -EIO - i/o error |
1950 | * | 1950 | * |
1951 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 1951 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
1952 | */ | 1952 | */ |
1953 | static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 1953 | static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, |
1954 | int nblocks) | 1954 | int nblocks) |
1955 | { | 1955 | { |
1956 | s8 oldroot; | 1956 | s8 oldroot; |
1957 | int rc; | 1957 | int rc; |
1958 | 1958 | ||
1959 | /* save the current value of the root (i.e. maximum free string) | 1959 | /* save the current value of the root (i.e. maximum free string) |
1960 | * of the dmap tree. | 1960 | * of the dmap tree. |
1961 | */ | 1961 | */ |
1962 | oldroot = dp->tree.stree[ROOT]; | 1962 | oldroot = dp->tree.stree[ROOT]; |
1963 | 1963 | ||
1964 | /* allocate the specified (blocks) bits */ | 1964 | /* allocate the specified (blocks) bits */ |
1965 | dbAllocBits(bmp, dp, blkno, nblocks); | 1965 | dbAllocBits(bmp, dp, blkno, nblocks); |
1966 | 1966 | ||
1967 | /* if the root has not changed, done. */ | 1967 | /* if the root has not changed, done. */ |
1968 | if (dp->tree.stree[ROOT] == oldroot) | 1968 | if (dp->tree.stree[ROOT] == oldroot) |
1969 | return (0); | 1969 | return (0); |
1970 | 1970 | ||
1971 | /* root changed. bubble the change up to the dmap control pages. | 1971 | /* root changed. bubble the change up to the dmap control pages. |
1972 | * if the adjustment of the upper level control pages fails, | 1972 | * if the adjustment of the upper level control pages fails, |
1973 | * backout the bit allocation (thus making everything consistent). | 1973 | * backout the bit allocation (thus making everything consistent). |
1974 | */ | 1974 | */ |
1975 | if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0))) | 1975 | if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0))) |
1976 | dbFreeBits(bmp, dp, blkno, nblocks); | 1976 | dbFreeBits(bmp, dp, blkno, nblocks); |
1977 | 1977 | ||
1978 | return (rc); | 1978 | return (rc); |
1979 | } | 1979 | } |
1980 | 1980 | ||
1981 | 1981 | ||
1982 | /* | 1982 | /* |
1983 | * NAME: dbFreeDmap() | 1983 | * NAME: dbFreeDmap() |
1984 | * | 1984 | * |
1985 | * FUNCTION: adjust the disk allocation map to reflect the allocation | 1985 | * FUNCTION: adjust the disk allocation map to reflect the allocation |
1986 | * of a specified block range within a dmap. | 1986 | * of a specified block range within a dmap. |
1987 | * | 1987 | * |
1988 | * this routine frees the specified blocks from the dmap through | 1988 | * this routine frees the specified blocks from the dmap through |
1989 | * a call to dbFreeBits(). if the deallocation of the block range | 1989 | * a call to dbFreeBits(). if the deallocation of the block range |
1990 | * causes the maximum string of free blocks within the dmap to | 1990 | * causes the maximum string of free blocks within the dmap to |
1991 | * change (i.e. the value of the root of the dmap's dmtree), this | 1991 | * change (i.e. the value of the root of the dmap's dmtree), this |
1992 | * routine will cause this change to be reflected up through the | 1992 | * routine will cause this change to be reflected up through the |
1993 | * appropriate levels of the dmap control pages by a call to | 1993 | * appropriate levels of the dmap control pages by a call to |
1994 | * dbAdjCtl() for the L0 dmap control page that covers this dmap. | 1994 | * dbAdjCtl() for the L0 dmap control page that covers this dmap. |
1995 | * | 1995 | * |
1996 | * PARAMETERS: | 1996 | * PARAMETERS: |
1997 | * bmp - pointer to bmap descriptor | 1997 | * bmp - pointer to bmap descriptor |
1998 | * dp - pointer to dmap to free the block range from. | 1998 | * dp - pointer to dmap to free the block range from. |
1999 | * blkno - starting block number of the block to be freed. | 1999 | * blkno - starting block number of the block to be freed. |
2000 | * nblocks - number of blocks to be freed. | 2000 | * nblocks - number of blocks to be freed. |
2001 | * | 2001 | * |
2002 | * RETURN VALUES: | 2002 | * RETURN VALUES: |
2003 | * 0 - success | 2003 | * 0 - success |
2004 | * -EIO - i/o error | 2004 | * -EIO - i/o error |
2005 | * | 2005 | * |
2006 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 2006 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
2007 | */ | 2007 | */ |
2008 | static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 2008 | static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, |
2009 | int nblocks) | 2009 | int nblocks) |
2010 | { | 2010 | { |
2011 | s8 oldroot; | 2011 | s8 oldroot; |
2012 | int rc = 0, word; | 2012 | int rc = 0, word; |
2013 | 2013 | ||
2014 | /* save the current value of the root (i.e. maximum free string) | 2014 | /* save the current value of the root (i.e. maximum free string) |
2015 | * of the dmap tree. | 2015 | * of the dmap tree. |
2016 | */ | 2016 | */ |
2017 | oldroot = dp->tree.stree[ROOT]; | 2017 | oldroot = dp->tree.stree[ROOT]; |
2018 | 2018 | ||
2019 | /* free the specified (blocks) bits */ | 2019 | /* free the specified (blocks) bits */ |
2020 | rc = dbFreeBits(bmp, dp, blkno, nblocks); | 2020 | rc = dbFreeBits(bmp, dp, blkno, nblocks); |
2021 | 2021 | ||
2022 | /* if error or the root has not changed, done. */ | 2022 | /* if error or the root has not changed, done. */ |
2023 | if (rc || (dp->tree.stree[ROOT] == oldroot)) | 2023 | if (rc || (dp->tree.stree[ROOT] == oldroot)) |
2024 | return (rc); | 2024 | return (rc); |
2025 | 2025 | ||
2026 | /* root changed. bubble the change up to the dmap control pages. | 2026 | /* root changed. bubble the change up to the dmap control pages. |
2027 | * if the adjustment of the upper level control pages fails, | 2027 | * if the adjustment of the upper level control pages fails, |
2028 | * backout the deallocation. | 2028 | * backout the deallocation. |
2029 | */ | 2029 | */ |
2030 | if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { | 2030 | if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { |
2031 | word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; | 2031 | word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; |
2032 | 2032 | ||
2033 | /* as part of backing out the deallocation, we will have | 2033 | /* as part of backing out the deallocation, we will have |
2034 | * to back split the dmap tree if the deallocation caused | 2034 | * to back split the dmap tree if the deallocation caused |
2035 | * the freed blocks to become part of a larger binary buddy | 2035 | * the freed blocks to become part of a larger binary buddy |
2036 | * system. | 2036 | * system. |
2037 | */ | 2037 | */ |
2038 | if (dp->tree.stree[word] == NOFREE) | 2038 | if (dp->tree.stree[word] == NOFREE) |
2039 | dbBackSplit((dmtree_t *) & dp->tree, word); | 2039 | dbBackSplit((dmtree_t *) & dp->tree, word); |
2040 | 2040 | ||
2041 | dbAllocBits(bmp, dp, blkno, nblocks); | 2041 | dbAllocBits(bmp, dp, blkno, nblocks); |
2042 | } | 2042 | } |
2043 | 2043 | ||
2044 | return (rc); | 2044 | return (rc); |
2045 | } | 2045 | } |
2046 | 2046 | ||
2047 | 2047 | ||
2048 | /* | 2048 | /* |
2049 | * NAME: dbAllocBits() | 2049 | * NAME: dbAllocBits() |
2050 | * | 2050 | * |
2051 | * FUNCTION: allocate a specified block range from a dmap. | 2051 | * FUNCTION: allocate a specified block range from a dmap. |
2052 | * | 2052 | * |
2053 | * this routine updates the dmap to reflect the working | 2053 | * this routine updates the dmap to reflect the working |
2054 | * state allocation of the specified block range. it directly | 2054 | * state allocation of the specified block range. it directly |
2055 | * updates the bits of the working map and causes the adjustment | 2055 | * updates the bits of the working map and causes the adjustment |
2056 | * of the binary buddy system described by the dmap's dmtree | 2056 | * of the binary buddy system described by the dmap's dmtree |
2057 | * leaves to reflect the bits allocated. it also causes the | 2057 | * leaves to reflect the bits allocated. it also causes the |
2058 | * dmap's dmtree, as a whole, to reflect the allocated range. | 2058 | * dmap's dmtree, as a whole, to reflect the allocated range. |
2059 | * | 2059 | * |
2060 | * PARAMETERS: | 2060 | * PARAMETERS: |
2061 | * bmp - pointer to bmap descriptor | 2061 | * bmp - pointer to bmap descriptor |
2062 | * dp - pointer to dmap to allocate bits from. | 2062 | * dp - pointer to dmap to allocate bits from. |
2063 | * blkno - starting block number of the bits to be allocated. | 2063 | * blkno - starting block number of the bits to be allocated. |
2064 | * nblocks - number of bits to be allocated. | 2064 | * nblocks - number of bits to be allocated. |
2065 | * | 2065 | * |
2066 | * RETURN VALUES: none | 2066 | * RETURN VALUES: none |
2067 | * | 2067 | * |
2068 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 2068 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
2069 | */ | 2069 | */ |
2070 | static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | 2070 | static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, |
2071 | int nblocks) | 2071 | int nblocks) |
2072 | { | 2072 | { |
2073 | int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; | 2073 | int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; |
2074 | dmtree_t *tp = (dmtree_t *) & dp->tree; | 2074 | dmtree_t *tp = (dmtree_t *) & dp->tree; |
2075 | int size; | 2075 | int size; |
2076 | s8 *leaf; | 2076 | s8 *leaf; |
2077 | 2077 | ||
2078 | /* pick up a pointer to the leaves of the dmap tree */ | 2078 | /* pick up a pointer to the leaves of the dmap tree */ |
2079 | leaf = dp->tree.stree + LEAFIND; | 2079 | leaf = dp->tree.stree + LEAFIND; |
2080 | 2080 | ||
2081 | /* determine the bit number and word within the dmap of the | 2081 | /* determine the bit number and word within the dmap of the |
2082 | * starting block. | 2082 | * starting block. |
2083 | */ | 2083 | */ |
2084 | dbitno = blkno & (BPERDMAP - 1); | 2084 | dbitno = blkno & (BPERDMAP - 1); |
2085 | word = dbitno >> L2DBWORD; | 2085 | word = dbitno >> L2DBWORD; |
2086 | 2086 | ||
2087 | /* block range better be within the dmap */ | 2087 | /* block range better be within the dmap */ |
2088 | assert(dbitno + nblocks <= BPERDMAP); | 2088 | assert(dbitno + nblocks <= BPERDMAP); |
2089 | 2089 | ||
2090 | /* allocate the bits of the dmap's words corresponding to the block | 2090 | /* allocate the bits of the dmap's words corresponding to the block |
2091 | * range. not all bits of the first and last words may be contained | 2091 | * range. not all bits of the first and last words may be contained |
2092 | * within the block range. if this is the case, we'll work against | 2092 | * within the block range. if this is the case, we'll work against |
2093 | * those words (i.e. partial first and/or last) on an individual basis | 2093 | * those words (i.e. partial first and/or last) on an individual basis |
2094 | * (a single pass), allocating the bits of interest by hand and | 2094 | * (a single pass), allocating the bits of interest by hand and |
2095 | * updating the leaf corresponding to the dmap word. a single pass | 2095 | * updating the leaf corresponding to the dmap word. a single pass |
2096 | * will be used for all dmap words fully contained within the | 2096 | * will be used for all dmap words fully contained within the |
2097 | * specified range. within this pass, the bits of all fully contained | 2097 | * specified range. within this pass, the bits of all fully contained |
2098 | * dmap words will be marked as free in a single shot and the leaves | 2098 | * dmap words will be marked as free in a single shot and the leaves |
2099 | * will be updated. a single leaf may describe the free space of | 2099 | * will be updated. a single leaf may describe the free space of |
2100 | * multiple dmap words, so we may update only a subset of the actual | 2100 | * multiple dmap words, so we may update only a subset of the actual |
2101 | * leaves corresponding to the dmap words of the block range. | 2101 | * leaves corresponding to the dmap words of the block range. |
2102 | */ | 2102 | */ |
2103 | for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { | 2103 | for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { |
2104 | /* determine the bit number within the word and | 2104 | /* determine the bit number within the word and |
2105 | * the number of bits within the word. | 2105 | * the number of bits within the word. |
2106 | */ | 2106 | */ |
2107 | wbitno = dbitno & (DBWORD - 1); | 2107 | wbitno = dbitno & (DBWORD - 1); |
2108 | nb = min(rembits, DBWORD - wbitno); | 2108 | nb = min(rembits, DBWORD - wbitno); |
2109 | 2109 | ||
2110 | /* check if only part of a word is to be allocated. | 2110 | /* check if only part of a word is to be allocated. |
2111 | */ | 2111 | */ |
2112 | if (nb < DBWORD) { | 2112 | if (nb < DBWORD) { |
2113 | /* allocate (set to 1) the appropriate bits within | 2113 | /* allocate (set to 1) the appropriate bits within |
2114 | * this dmap word. | 2114 | * this dmap word. |
2115 | */ | 2115 | */ |
2116 | dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) | 2116 | dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) |
2117 | >> wbitno); | 2117 | >> wbitno); |
2118 | 2118 | ||
2119 | /* update the leaf for this dmap word. in addition | 2119 | /* update the leaf for this dmap word. in addition |
2120 | * to setting the leaf value to the binary buddy max | 2120 | * to setting the leaf value to the binary buddy max |
2121 | * of the updated dmap word, dbSplit() will split | 2121 | * of the updated dmap word, dbSplit() will split |
2122 | * the binary system of the leaves if need be. | 2122 | * the binary system of the leaves if need be. |
2123 | */ | 2123 | */ |
2124 | dbSplit(tp, word, BUDMIN, | 2124 | dbSplit(tp, word, BUDMIN, |
2125 | dbMaxBud((u8 *) & dp->wmap[word])); | 2125 | dbMaxBud((u8 *) & dp->wmap[word])); |
2126 | 2126 | ||
2127 | word += 1; | 2127 | word += 1; |
2128 | } else { | 2128 | } else { |
2129 | /* one or more dmap words are fully contained | 2129 | /* one or more dmap words are fully contained |
2130 | * within the block range. determine how many | 2130 | * within the block range. determine how many |
2131 | * words and allocate (set to 1) the bits of these | 2131 | * words and allocate (set to 1) the bits of these |
2132 | * words. | 2132 | * words. |
2133 | */ | 2133 | */ |
2134 | nwords = rembits >> L2DBWORD; | 2134 | nwords = rembits >> L2DBWORD; |
2135 | memset(&dp->wmap[word], (int) ONES, nwords * 4); | 2135 | memset(&dp->wmap[word], (int) ONES, nwords * 4); |
2136 | 2136 | ||
2137 | /* determine how many bits. | 2137 | /* determine how many bits. |
2138 | */ | 2138 | */ |
2139 | nb = nwords << L2DBWORD; | 2139 | nb = nwords << L2DBWORD; |
2140 | 2140 | ||
2141 | /* now update the appropriate leaves to reflect | 2141 | /* now update the appropriate leaves to reflect |
2142 | * the allocated words. | 2142 | * the allocated words. |
2143 | */ | 2143 | */ |
2144 | for (; nwords > 0; nwords -= nw) { | 2144 | for (; nwords > 0; nwords -= nw) { |
2145 | if (leaf[word] < BUDMIN) { | 2145 | if (leaf[word] < BUDMIN) { |
2146 | jfs_error(bmp->db_ipbmap->i_sb, | 2146 | jfs_error(bmp->db_ipbmap->i_sb, |
2147 | "dbAllocBits: leaf page " | 2147 | "dbAllocBits: leaf page " |
2148 | "corrupt"); | 2148 | "corrupt"); |
2149 | break; | 2149 | break; |
2150 | } | 2150 | } |
2151 | 2151 | ||
2152 | /* determine what the leaf value should be | 2152 | /* determine what the leaf value should be |
2153 | * updated to as the minimum of the l2 number | 2153 | * updated to as the minimum of the l2 number |
2154 | * of bits being allocated and the l2 number | 2154 | * of bits being allocated and the l2 number |
2155 | * of bits currently described by this leaf. | 2155 | * of bits currently described by this leaf. |
2156 | */ | 2156 | */ |
2157 | size = min((int)leaf[word], NLSTOL2BSZ(nwords)); | 2157 | size = min((int)leaf[word], NLSTOL2BSZ(nwords)); |
2158 | 2158 | ||
2159 | /* update the leaf to reflect the allocation. | 2159 | /* update the leaf to reflect the allocation. |
2160 | * in addition to setting the leaf value to | 2160 | * in addition to setting the leaf value to |
2161 | * NOFREE, dbSplit() will split the binary | 2161 | * NOFREE, dbSplit() will split the binary |
2162 | * system of the leaves to reflect the current | 2162 | * system of the leaves to reflect the current |
2163 | * allocation (size). | 2163 | * allocation (size). |
2164 | */ | 2164 | */ |
2165 | dbSplit(tp, word, size, NOFREE); | 2165 | dbSplit(tp, word, size, NOFREE); |
2166 | 2166 | ||
2167 | /* get the number of dmap words handled */ | 2167 | /* get the number of dmap words handled */ |
2168 | nw = BUDSIZE(size, BUDMIN); | 2168 | nw = BUDSIZE(size, BUDMIN); |
2169 | word += nw; | 2169 | word += nw; |
2170 | } | 2170 | } |
2171 | } | 2171 | } |
2172 | } | 2172 | } |
2173 | 2173 | ||
2174 | /* update the free count for this dmap */ | 2174 | /* update the free count for this dmap */ |
2175 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); | 2175 | le32_add_cpu(&dp->nfree, -nblocks); |
2176 | 2176 | ||
2177 | BMAP_LOCK(bmp); | 2177 | BMAP_LOCK(bmp); |
2178 | 2178 | ||
2179 | /* if this allocation group is completely free, | 2179 | /* if this allocation group is completely free, |
2180 | * update the maximum allocation group number if this allocation | 2180 | * update the maximum allocation group number if this allocation |
2181 | * group is the new max. | 2181 | * group is the new max. |
2182 | */ | 2182 | */ |
2183 | agno = blkno >> bmp->db_agl2size; | 2183 | agno = blkno >> bmp->db_agl2size; |
2184 | if (agno > bmp->db_maxag) | 2184 | if (agno > bmp->db_maxag) |
2185 | bmp->db_maxag = agno; | 2185 | bmp->db_maxag = agno; |
2186 | 2186 | ||
2187 | /* update the free count for the allocation group and map */ | 2187 | /* update the free count for the allocation group and map */ |
2188 | bmp->db_agfree[agno] -= nblocks; | 2188 | bmp->db_agfree[agno] -= nblocks; |
2189 | bmp->db_nfree -= nblocks; | 2189 | bmp->db_nfree -= nblocks; |
2190 | 2190 | ||
2191 | BMAP_UNLOCK(bmp); | 2191 | BMAP_UNLOCK(bmp); |
2192 | } | 2192 | } |
2193 | 2193 | ||
2194 | 2194 | ||
2195 | /* | 2195 | /* |
2196 | * NAME: dbFreeBits() | 2196 | * NAME: dbFreeBits() |
2197 | * | 2197 | * |
2198 | * FUNCTION: free a specified block range from a dmap. | 2198 | * FUNCTION: free a specified block range from a dmap. |
2199 | * | 2199 | * |
2200 | * this routine updates the dmap to reflect the working | 2200 | * this routine updates the dmap to reflect the working |
2201 | * state allocation of the specified block range. it directly | 2201 | * state allocation of the specified block range. it directly |
2202 | * updates the bits of the working map and causes the adjustment | 2202 | * updates the bits of the working map and causes the adjustment |
2203 | * of the binary buddy system described by the dmap's dmtree | 2203 | * of the binary buddy system described by the dmap's dmtree |
2204 | * leaves to reflect the bits freed. it also causes the dmap's | 2204 | * leaves to reflect the bits freed. it also causes the dmap's |
2205 | * dmtree, as a whole, to reflect the deallocated range. | 2205 | * dmtree, as a whole, to reflect the deallocated range. |
2206 | * | 2206 | * |
2207 | * PARAMETERS: | 2207 | * PARAMETERS: |
2208 | * bmp - pointer to bmap descriptor | 2208 | * bmp - pointer to bmap descriptor |
2209 | * dp - pointer to dmap to free bits from. | 2209 | * dp - pointer to dmap to free bits from. |
2210 | * blkno - starting block number of the bits to be freed. | 2210 | * blkno - starting block number of the bits to be freed. |
2211 | * nblocks - number of bits to be freed. | 2211 | * nblocks - number of bits to be freed. |
2212 | * | 2212 | * |
2213 | * RETURN VALUES: 0 for success | 2213 | * RETURN VALUES: 0 for success |
2214 | * | 2214 | * |
2215 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 2215 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
2216 | */ | 2216 | */ |
2217 | static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | 2217 | static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, |
2218 | int nblocks) | 2218 | int nblocks) |
2219 | { | 2219 | { |
2220 | int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; | 2220 | int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; |
2221 | dmtree_t *tp = (dmtree_t *) & dp->tree; | 2221 | dmtree_t *tp = (dmtree_t *) & dp->tree; |
2222 | int rc = 0; | 2222 | int rc = 0; |
2223 | int size; | 2223 | int size; |
2224 | 2224 | ||
2225 | /* determine the bit number and word within the dmap of the | 2225 | /* determine the bit number and word within the dmap of the |
2226 | * starting block. | 2226 | * starting block. |
2227 | */ | 2227 | */ |
2228 | dbitno = blkno & (BPERDMAP - 1); | 2228 | dbitno = blkno & (BPERDMAP - 1); |
2229 | word = dbitno >> L2DBWORD; | 2229 | word = dbitno >> L2DBWORD; |
2230 | 2230 | ||
2231 | /* block range better be within the dmap. | 2231 | /* block range better be within the dmap. |
2232 | */ | 2232 | */ |
2233 | assert(dbitno + nblocks <= BPERDMAP); | 2233 | assert(dbitno + nblocks <= BPERDMAP); |
2234 | 2234 | ||
2235 | /* free the bits of the dmaps words corresponding to the block range. | 2235 | /* free the bits of the dmaps words corresponding to the block range. |
2236 | * not all bits of the first and last words may be contained within | 2236 | * not all bits of the first and last words may be contained within |
2237 | * the block range. if this is the case, we'll work against those | 2237 | * the block range. if this is the case, we'll work against those |
2238 | * words (i.e. partial first and/or last) on an individual basis | 2238 | * words (i.e. partial first and/or last) on an individual basis |
2239 | * (a single pass), freeing the bits of interest by hand and updating | 2239 | * (a single pass), freeing the bits of interest by hand and updating |
2240 | * the leaf corresponding to the dmap word. a single pass will be used | 2240 | * the leaf corresponding to the dmap word. a single pass will be used |
2241 | * for all dmap words fully contained within the specified range. | 2241 | * for all dmap words fully contained within the specified range. |
2242 | * within this pass, the bits of all fully contained dmap words will | 2242 | * within this pass, the bits of all fully contained dmap words will |
2243 | * be marked as free in a single shot and the leaves will be updated. a | 2243 | * be marked as free in a single shot and the leaves will be updated. a |
2244 | * single leaf may describe the free space of multiple dmap words, | 2244 | * single leaf may describe the free space of multiple dmap words, |
2245 | * so we may update only a subset of the actual leaves corresponding | 2245 | * so we may update only a subset of the actual leaves corresponding |
2246 | * to the dmap words of the block range. | 2246 | * to the dmap words of the block range. |
2247 | * | 2247 | * |
2248 | * dbJoin() is used to update leaf values and will join the binary | 2248 | * dbJoin() is used to update leaf values and will join the binary |
2249 | * buddy system of the leaves if the new leaf values indicate this | 2249 | * buddy system of the leaves if the new leaf values indicate this |
2250 | * should be done. | 2250 | * should be done. |
2251 | */ | 2251 | */ |
2252 | for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { | 2252 | for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { |
2253 | /* determine the bit number within the word and | 2253 | /* determine the bit number within the word and |
2254 | * the number of bits within the word. | 2254 | * the number of bits within the word. |
2255 | */ | 2255 | */ |
2256 | wbitno = dbitno & (DBWORD - 1); | 2256 | wbitno = dbitno & (DBWORD - 1); |
2257 | nb = min(rembits, DBWORD - wbitno); | 2257 | nb = min(rembits, DBWORD - wbitno); |
2258 | 2258 | ||
2259 | /* check if only part of a word is to be freed. | 2259 | /* check if only part of a word is to be freed. |
2260 | */ | 2260 | */ |
2261 | if (nb < DBWORD) { | 2261 | if (nb < DBWORD) { |
2262 | /* free (zero) the appropriate bits within this | 2262 | /* free (zero) the appropriate bits within this |
2263 | * dmap word. | 2263 | * dmap word. |
2264 | */ | 2264 | */ |
2265 | dp->wmap[word] &= | 2265 | dp->wmap[word] &= |
2266 | cpu_to_le32(~(ONES << (DBWORD - nb) | 2266 | cpu_to_le32(~(ONES << (DBWORD - nb) |
2267 | >> wbitno)); | 2267 | >> wbitno)); |
2268 | 2268 | ||
2269 | /* update the leaf for this dmap word. | 2269 | /* update the leaf for this dmap word. |
2270 | */ | 2270 | */ |
2271 | rc = dbJoin(tp, word, | 2271 | rc = dbJoin(tp, word, |
2272 | dbMaxBud((u8 *) & dp->wmap[word])); | 2272 | dbMaxBud((u8 *) & dp->wmap[word])); |
2273 | if (rc) | 2273 | if (rc) |
2274 | return rc; | 2274 | return rc; |
2275 | 2275 | ||
2276 | word += 1; | 2276 | word += 1; |
2277 | } else { | 2277 | } else { |
2278 | /* one or more dmap words are fully contained | 2278 | /* one or more dmap words are fully contained |
2279 | * within the block range. determine how many | 2279 | * within the block range. determine how many |
2280 | * words and free (zero) the bits of these words. | 2280 | * words and free (zero) the bits of these words. |
2281 | */ | 2281 | */ |
2282 | nwords = rembits >> L2DBWORD; | 2282 | nwords = rembits >> L2DBWORD; |
2283 | memset(&dp->wmap[word], 0, nwords * 4); | 2283 | memset(&dp->wmap[word], 0, nwords * 4); |
2284 | 2284 | ||
2285 | /* determine how many bits. | 2285 | /* determine how many bits. |
2286 | */ | 2286 | */ |
2287 | nb = nwords << L2DBWORD; | 2287 | nb = nwords << L2DBWORD; |
2288 | 2288 | ||
2289 | /* now update the appropriate leaves to reflect | 2289 | /* now update the appropriate leaves to reflect |
2290 | * the freed words. | 2290 | * the freed words. |
2291 | */ | 2291 | */ |
2292 | for (; nwords > 0; nwords -= nw) { | 2292 | for (; nwords > 0; nwords -= nw) { |
2293 | /* determine what the leaf value should be | 2293 | /* determine what the leaf value should be |
2294 | * updated to as the minimum of the l2 number | 2294 | * updated to as the minimum of the l2 number |
2295 | * of bits being freed and the l2 (max) number | 2295 | * of bits being freed and the l2 (max) number |
2296 | * of bits that can be described by this leaf. | 2296 | * of bits that can be described by this leaf. |
2297 | */ | 2297 | */ |
2298 | size = | 2298 | size = |
2299 | min(LITOL2BSZ | 2299 | min(LITOL2BSZ |
2300 | (word, L2LPERDMAP, BUDMIN), | 2300 | (word, L2LPERDMAP, BUDMIN), |
2301 | NLSTOL2BSZ(nwords)); | 2301 | NLSTOL2BSZ(nwords)); |
2302 | 2302 | ||
2303 | /* update the leaf. | 2303 | /* update the leaf. |
2304 | */ | 2304 | */ |
2305 | rc = dbJoin(tp, word, size); | 2305 | rc = dbJoin(tp, word, size); |
2306 | if (rc) | 2306 | if (rc) |
2307 | return rc; | 2307 | return rc; |
2308 | 2308 | ||
2309 | /* get the number of dmap words handled. | 2309 | /* get the number of dmap words handled. |
2310 | */ | 2310 | */ |
2311 | nw = BUDSIZE(size, BUDMIN); | 2311 | nw = BUDSIZE(size, BUDMIN); |
2312 | word += nw; | 2312 | word += nw; |
2313 | } | 2313 | } |
2314 | } | 2314 | } |
2315 | } | 2315 | } |
2316 | 2316 | ||
2317 | /* update the free count for this dmap. | 2317 | /* update the free count for this dmap. |
2318 | */ | 2318 | */ |
2319 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); | 2319 | le32_add_cpu(&dp->nfree, nblocks); |
2320 | 2320 | ||
2321 | BMAP_LOCK(bmp); | 2321 | BMAP_LOCK(bmp); |
2322 | 2322 | ||
2323 | /* update the free count for the allocation group and | 2323 | /* update the free count for the allocation group and |
2324 | * map. | 2324 | * map. |
2325 | */ | 2325 | */ |
2326 | agno = blkno >> bmp->db_agl2size; | 2326 | agno = blkno >> bmp->db_agl2size; |
2327 | bmp->db_nfree += nblocks; | 2327 | bmp->db_nfree += nblocks; |
2328 | bmp->db_agfree[agno] += nblocks; | 2328 | bmp->db_agfree[agno] += nblocks; |
2329 | 2329 | ||
2330 | /* check if this allocation group is not completely free and | 2330 | /* check if this allocation group is not completely free and |
2331 | * if it is currently the maximum (rightmost) allocation group. | 2331 | * if it is currently the maximum (rightmost) allocation group. |
2332 | * if so, establish the new maximum allocation group number by | 2332 | * if so, establish the new maximum allocation group number by |
2333 | * searching left for the first allocation group with allocation. | 2333 | * searching left for the first allocation group with allocation. |
2334 | */ | 2334 | */ |
2335 | if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) || | 2335 | if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) || |
2336 | (agno == bmp->db_numag - 1 && | 2336 | (agno == bmp->db_numag - 1 && |
2337 | bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) { | 2337 | bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) { |
2338 | while (bmp->db_maxag > 0) { | 2338 | while (bmp->db_maxag > 0) { |
2339 | bmp->db_maxag -= 1; | 2339 | bmp->db_maxag -= 1; |
2340 | if (bmp->db_agfree[bmp->db_maxag] != | 2340 | if (bmp->db_agfree[bmp->db_maxag] != |
2341 | bmp->db_agsize) | 2341 | bmp->db_agsize) |
2342 | break; | 2342 | break; |
2343 | } | 2343 | } |
2344 | 2344 | ||
2345 | /* re-establish the allocation group preference if the | 2345 | /* re-establish the allocation group preference if the |
2346 | * current preference is right of the maximum allocation | 2346 | * current preference is right of the maximum allocation |
2347 | * group. | 2347 | * group. |
2348 | */ | 2348 | */ |
2349 | if (bmp->db_agpref > bmp->db_maxag) | 2349 | if (bmp->db_agpref > bmp->db_maxag) |
2350 | bmp->db_agpref = bmp->db_maxag; | 2350 | bmp->db_agpref = bmp->db_maxag; |
2351 | } | 2351 | } |
2352 | 2352 | ||
2353 | BMAP_UNLOCK(bmp); | 2353 | BMAP_UNLOCK(bmp); |
2354 | 2354 | ||
2355 | return 0; | 2355 | return 0; |
2356 | } | 2356 | } |
2357 | 2357 | ||
2358 | 2358 | ||
2359 | /* | 2359 | /* |
2360 | * NAME: dbAdjCtl() | 2360 | * NAME: dbAdjCtl() |
2361 | * | 2361 | * |
2362 | * FUNCTION: adjust a dmap control page at a specified level to reflect | 2362 | * FUNCTION: adjust a dmap control page at a specified level to reflect |
2363 | * the change in a lower level dmap or dmap control page's | 2363 | * the change in a lower level dmap or dmap control page's |
2364 | * maximum string of free blocks (i.e. a change in the root | 2364 | * maximum string of free blocks (i.e. a change in the root |
2365 | * of the lower level object's dmtree) due to the allocation | 2365 | * of the lower level object's dmtree) due to the allocation |
2366 | * or deallocation of a range of blocks with a single dmap. | 2366 | * or deallocation of a range of blocks with a single dmap. |
2367 | * | 2367 | * |
2368 | * on entry, this routine is provided with the new value of | 2368 | * on entry, this routine is provided with the new value of |
2369 | * the lower level dmap or dmap control page root and the | 2369 | * the lower level dmap or dmap control page root and the |
2370 | * starting block number of the block range whose allocation | 2370 | * starting block number of the block range whose allocation |
2371 | * or deallocation resulted in the root change. this range | 2371 | * or deallocation resulted in the root change. this range |
2372 | * is respresented by a single leaf of the current dmapctl | 2372 | * is respresented by a single leaf of the current dmapctl |
2373 | * and the leaf will be updated with this value, possibly | 2373 | * and the leaf will be updated with this value, possibly |
2374 | * causing a binary buddy system within the leaves to be | 2374 | * causing a binary buddy system within the leaves to be |
2375 | * split or joined. the update may also cause the dmapctl's | 2375 | * split or joined. the update may also cause the dmapctl's |
2376 | * dmtree to be updated. | 2376 | * dmtree to be updated. |
2377 | * | 2377 | * |
2378 | * if the adjustment of the dmap control page, itself, causes its | 2378 | * if the adjustment of the dmap control page, itself, causes its |
2379 | * root to change, this change will be bubbled up to the next dmap | 2379 | * root to change, this change will be bubbled up to the next dmap |
2380 | * control level by a recursive call to this routine, specifying | 2380 | * control level by a recursive call to this routine, specifying |
2381 | * the new root value and the next dmap control page level to | 2381 | * the new root value and the next dmap control page level to |
2382 | * be adjusted. | 2382 | * be adjusted. |
2383 | * PARAMETERS: | 2383 | * PARAMETERS: |
2384 | * bmp - pointer to bmap descriptor | 2384 | * bmp - pointer to bmap descriptor |
2385 | * blkno - the first block of a block range within a dmap. it is | 2385 | * blkno - the first block of a block range within a dmap. it is |
2386 | * the allocation or deallocation of this block range that | 2386 | * the allocation or deallocation of this block range that |
2387 | * requires the dmap control page to be adjusted. | 2387 | * requires the dmap control page to be adjusted. |
2388 | * newval - the new value of the lower level dmap or dmap control | 2388 | * newval - the new value of the lower level dmap or dmap control |
2389 | * page root. | 2389 | * page root. |
2390 | * alloc - 'true' if adjustment is due to an allocation. | 2390 | * alloc - 'true' if adjustment is due to an allocation. |
2391 | * level - current level of dmap control page (i.e. L0, L1, L2) to | 2391 | * level - current level of dmap control page (i.e. L0, L1, L2) to |
2392 | * be adjusted. | 2392 | * be adjusted. |
2393 | * | 2393 | * |
2394 | * RETURN VALUES: | 2394 | * RETURN VALUES: |
2395 | * 0 - success | 2395 | * 0 - success |
2396 | * -EIO - i/o error | 2396 | * -EIO - i/o error |
2397 | * | 2397 | * |
2398 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 2398 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
2399 | */ | 2399 | */ |
2400 | static int | 2400 | static int |
2401 | dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) | 2401 | dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) |
2402 | { | 2402 | { |
2403 | struct metapage *mp; | 2403 | struct metapage *mp; |
2404 | s8 oldroot; | 2404 | s8 oldroot; |
2405 | int oldval; | 2405 | int oldval; |
2406 | s64 lblkno; | 2406 | s64 lblkno; |
2407 | struct dmapctl *dcp; | 2407 | struct dmapctl *dcp; |
2408 | int rc, leafno, ti; | 2408 | int rc, leafno, ti; |
2409 | 2409 | ||
2410 | /* get the buffer for the dmap control page for the specified | 2410 | /* get the buffer for the dmap control page for the specified |
2411 | * block number and control page level. | 2411 | * block number and control page level. |
2412 | */ | 2412 | */ |
2413 | lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level); | 2413 | lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level); |
2414 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 2414 | mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); |
2415 | if (mp == NULL) | 2415 | if (mp == NULL) |
2416 | return -EIO; | 2416 | return -EIO; |
2417 | dcp = (struct dmapctl *) mp->data; | 2417 | dcp = (struct dmapctl *) mp->data; |
2418 | 2418 | ||
2419 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 2419 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
2420 | jfs_error(bmp->db_ipbmap->i_sb, | 2420 | jfs_error(bmp->db_ipbmap->i_sb, |
2421 | "dbAdjCtl: Corrupt dmapctl page"); | 2421 | "dbAdjCtl: Corrupt dmapctl page"); |
2422 | release_metapage(mp); | 2422 | release_metapage(mp); |
2423 | return -EIO; | 2423 | return -EIO; |
2424 | } | 2424 | } |
2425 | 2425 | ||
2426 | /* determine the leaf number corresponding to the block and | 2426 | /* determine the leaf number corresponding to the block and |
2427 | * the index within the dmap control tree. | 2427 | * the index within the dmap control tree. |
2428 | */ | 2428 | */ |
2429 | leafno = BLKTOCTLLEAF(blkno, dcp->budmin); | 2429 | leafno = BLKTOCTLLEAF(blkno, dcp->budmin); |
2430 | ti = leafno + le32_to_cpu(dcp->leafidx); | 2430 | ti = leafno + le32_to_cpu(dcp->leafidx); |
2431 | 2431 | ||
2432 | /* save the current leaf value and the current root level (i.e. | 2432 | /* save the current leaf value and the current root level (i.e. |
2433 | * maximum l2 free string described by this dmapctl). | 2433 | * maximum l2 free string described by this dmapctl). |
2434 | */ | 2434 | */ |
2435 | oldval = dcp->stree[ti]; | 2435 | oldval = dcp->stree[ti]; |
2436 | oldroot = dcp->stree[ROOT]; | 2436 | oldroot = dcp->stree[ROOT]; |
2437 | 2437 | ||
2438 | /* check if this is a control page update for an allocation. | 2438 | /* check if this is a control page update for an allocation. |
2439 | * if so, update the leaf to reflect the new leaf value using | 2439 | * if so, update the leaf to reflect the new leaf value using |
2440 | * dbSplit(); otherwise (deallocation), use dbJoin() to udpate | 2440 | * dbSplit(); otherwise (deallocation), use dbJoin() to udpate |
2441 | * the leaf with the new value. in addition to updating the | 2441 | * the leaf with the new value. in addition to updating the |
2442 | * leaf, dbSplit() will also split the binary buddy system of | 2442 | * leaf, dbSplit() will also split the binary buddy system of |
2443 | * the leaves, if required, and bubble new values within the | 2443 | * the leaves, if required, and bubble new values within the |
2444 | * dmapctl tree, if required. similarly, dbJoin() will join | 2444 | * dmapctl tree, if required. similarly, dbJoin() will join |
2445 | * the binary buddy system of leaves and bubble new values up | 2445 | * the binary buddy system of leaves and bubble new values up |
2446 | * the dmapctl tree as required by the new leaf value. | 2446 | * the dmapctl tree as required by the new leaf value. |
2447 | */ | 2447 | */ |
2448 | if (alloc) { | 2448 | if (alloc) { |
2449 | /* check if we are in the middle of a binary buddy | 2449 | /* check if we are in the middle of a binary buddy |
2450 | * system. this happens when we are performing the | 2450 | * system. this happens when we are performing the |
2451 | * first allocation out of an allocation group that | 2451 | * first allocation out of an allocation group that |
2452 | * is part (not the first part) of a larger binary | 2452 | * is part (not the first part) of a larger binary |
2453 | * buddy system. if we are in the middle, back split | 2453 | * buddy system. if we are in the middle, back split |
2454 | * the system prior to calling dbSplit() which assumes | 2454 | * the system prior to calling dbSplit() which assumes |
2455 | * that it is at the front of a binary buddy system. | 2455 | * that it is at the front of a binary buddy system. |
2456 | */ | 2456 | */ |
2457 | if (oldval == NOFREE) { | 2457 | if (oldval == NOFREE) { |
2458 | rc = dbBackSplit((dmtree_t *) dcp, leafno); | 2458 | rc = dbBackSplit((dmtree_t *) dcp, leafno); |
2459 | if (rc) | 2459 | if (rc) |
2460 | return rc; | 2460 | return rc; |
2461 | oldval = dcp->stree[ti]; | 2461 | oldval = dcp->stree[ti]; |
2462 | } | 2462 | } |
2463 | dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); | 2463 | dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); |
2464 | } else { | 2464 | } else { |
2465 | rc = dbJoin((dmtree_t *) dcp, leafno, newval); | 2465 | rc = dbJoin((dmtree_t *) dcp, leafno, newval); |
2466 | if (rc) | 2466 | if (rc) |
2467 | return rc; | 2467 | return rc; |
2468 | } | 2468 | } |
2469 | 2469 | ||
2470 | /* check if the root of the current dmap control page changed due | 2470 | /* check if the root of the current dmap control page changed due |
2471 | * to the update and if the current dmap control page is not at | 2471 | * to the update and if the current dmap control page is not at |
2472 | * the current top level (i.e. L0, L1, L2) of the map. if so (i.e. | 2472 | * the current top level (i.e. L0, L1, L2) of the map. if so (i.e. |
2473 | * root changed and this is not the top level), call this routine | 2473 | * root changed and this is not the top level), call this routine |
2474 | * again (recursion) for the next higher level of the mapping to | 2474 | * again (recursion) for the next higher level of the mapping to |
2475 | * reflect the change in root for the current dmap control page. | 2475 | * reflect the change in root for the current dmap control page. |
2476 | */ | 2476 | */ |
2477 | if (dcp->stree[ROOT] != oldroot) { | 2477 | if (dcp->stree[ROOT] != oldroot) { |
2478 | /* are we below the top level of the map. if so, | 2478 | /* are we below the top level of the map. if so, |
2479 | * bubble the root up to the next higher level. | 2479 | * bubble the root up to the next higher level. |
2480 | */ | 2480 | */ |
2481 | if (level < bmp->db_maxlevel) { | 2481 | if (level < bmp->db_maxlevel) { |
2482 | /* bubble up the new root of this dmap control page to | 2482 | /* bubble up the new root of this dmap control page to |
2483 | * the next level. | 2483 | * the next level. |
2484 | */ | 2484 | */ |
2485 | if ((rc = | 2485 | if ((rc = |
2486 | dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc, | 2486 | dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc, |
2487 | level + 1))) { | 2487 | level + 1))) { |
2488 | /* something went wrong in bubbling up the new | 2488 | /* something went wrong in bubbling up the new |
2489 | * root value, so backout the changes to the | 2489 | * root value, so backout the changes to the |
2490 | * current dmap control page. | 2490 | * current dmap control page. |
2491 | */ | 2491 | */ |
2492 | if (alloc) { | 2492 | if (alloc) { |
2493 | dbJoin((dmtree_t *) dcp, leafno, | 2493 | dbJoin((dmtree_t *) dcp, leafno, |
2494 | oldval); | 2494 | oldval); |
2495 | } else { | 2495 | } else { |
2496 | /* the dbJoin() above might have | 2496 | /* the dbJoin() above might have |
2497 | * caused a larger binary buddy system | 2497 | * caused a larger binary buddy system |
2498 | * to form and we may now be in the | 2498 | * to form and we may now be in the |
2499 | * middle of it. if this is the case, | 2499 | * middle of it. if this is the case, |
2500 | * back split the buddies. | 2500 | * back split the buddies. |
2501 | */ | 2501 | */ |
2502 | if (dcp->stree[ti] == NOFREE) | 2502 | if (dcp->stree[ti] == NOFREE) |
2503 | dbBackSplit((dmtree_t *) | 2503 | dbBackSplit((dmtree_t *) |
2504 | dcp, leafno); | 2504 | dcp, leafno); |
2505 | dbSplit((dmtree_t *) dcp, leafno, | 2505 | dbSplit((dmtree_t *) dcp, leafno, |
2506 | dcp->budmin, oldval); | 2506 | dcp->budmin, oldval); |
2507 | } | 2507 | } |
2508 | 2508 | ||
2509 | /* release the buffer and return the error. | 2509 | /* release the buffer and return the error. |
2510 | */ | 2510 | */ |
2511 | release_metapage(mp); | 2511 | release_metapage(mp); |
2512 | return (rc); | 2512 | return (rc); |
2513 | } | 2513 | } |
2514 | } else { | 2514 | } else { |
2515 | /* we're at the top level of the map. update | 2515 | /* we're at the top level of the map. update |
2516 | * the bmap control page to reflect the size | 2516 | * the bmap control page to reflect the size |
2517 | * of the maximum free buddy system. | 2517 | * of the maximum free buddy system. |
2518 | */ | 2518 | */ |
2519 | assert(level == bmp->db_maxlevel); | 2519 | assert(level == bmp->db_maxlevel); |
2520 | if (bmp->db_maxfreebud != oldroot) { | 2520 | if (bmp->db_maxfreebud != oldroot) { |
2521 | jfs_error(bmp->db_ipbmap->i_sb, | 2521 | jfs_error(bmp->db_ipbmap->i_sb, |
2522 | "dbAdjCtl: the maximum free buddy is " | 2522 | "dbAdjCtl: the maximum free buddy is " |
2523 | "not the old root"); | 2523 | "not the old root"); |
2524 | } | 2524 | } |
2525 | bmp->db_maxfreebud = dcp->stree[ROOT]; | 2525 | bmp->db_maxfreebud = dcp->stree[ROOT]; |
2526 | } | 2526 | } |
2527 | } | 2527 | } |
2528 | 2528 | ||
2529 | /* write the buffer. | 2529 | /* write the buffer. |
2530 | */ | 2530 | */ |
2531 | write_metapage(mp); | 2531 | write_metapage(mp); |
2532 | 2532 | ||
2533 | return (0); | 2533 | return (0); |
2534 | } | 2534 | } |
2535 | 2535 | ||
2536 | 2536 | ||
2537 | /* | 2537 | /* |
2538 | * NAME: dbSplit() | 2538 | * NAME: dbSplit() |
2539 | * | 2539 | * |
2540 | * FUNCTION: update the leaf of a dmtree with a new value, splitting | 2540 | * FUNCTION: update the leaf of a dmtree with a new value, splitting |
2541 | * the leaf from the binary buddy system of the dmtree's | 2541 | * the leaf from the binary buddy system of the dmtree's |
2542 | * leaves, as required. | 2542 | * leaves, as required. |
2543 | * | 2543 | * |
2544 | * PARAMETERS: | 2544 | * PARAMETERS: |
2545 | * tp - pointer to the tree containing the leaf. | 2545 | * tp - pointer to the tree containing the leaf. |
2546 | * leafno - the number of the leaf to be updated. | 2546 | * leafno - the number of the leaf to be updated. |
2547 | * splitsz - the size the binary buddy system starting at the leaf | 2547 | * splitsz - the size the binary buddy system starting at the leaf |
2548 | * must be split to, specified as the log2 number of blocks. | 2548 | * must be split to, specified as the log2 number of blocks. |
2549 | * newval - the new value for the leaf. | 2549 | * newval - the new value for the leaf. |
2550 | * | 2550 | * |
2551 | * RETURN VALUES: none | 2551 | * RETURN VALUES: none |
2552 | * | 2552 | * |
2553 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 2553 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
2554 | */ | 2554 | */ |
2555 | static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) | 2555 | static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) |
2556 | { | 2556 | { |
2557 | int budsz; | 2557 | int budsz; |
2558 | int cursz; | 2558 | int cursz; |
2559 | s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); | 2559 | s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); |
2560 | 2560 | ||
2561 | /* check if the leaf needs to be split. | 2561 | /* check if the leaf needs to be split. |
2562 | */ | 2562 | */ |
2563 | if (leaf[leafno] > tp->dmt_budmin) { | 2563 | if (leaf[leafno] > tp->dmt_budmin) { |
2564 | /* the split occurs by cutting the buddy system in half | 2564 | /* the split occurs by cutting the buddy system in half |
2565 | * at the specified leaf until we reach the specified | 2565 | * at the specified leaf until we reach the specified |
2566 | * size. pick up the starting split size (current size | 2566 | * size. pick up the starting split size (current size |
2567 | * - 1 in l2) and the corresponding buddy size. | 2567 | * - 1 in l2) and the corresponding buddy size. |
2568 | */ | 2568 | */ |
2569 | cursz = leaf[leafno] - 1; | 2569 | cursz = leaf[leafno] - 1; |
2570 | budsz = BUDSIZE(cursz, tp->dmt_budmin); | 2570 | budsz = BUDSIZE(cursz, tp->dmt_budmin); |
2571 | 2571 | ||
2572 | /* split until we reach the specified size. | 2572 | /* split until we reach the specified size. |
2573 | */ | 2573 | */ |
2574 | while (cursz >= splitsz) { | 2574 | while (cursz >= splitsz) { |
2575 | /* update the buddy's leaf with its new value. | 2575 | /* update the buddy's leaf with its new value. |
2576 | */ | 2576 | */ |
2577 | dbAdjTree(tp, leafno ^ budsz, cursz); | 2577 | dbAdjTree(tp, leafno ^ budsz, cursz); |
2578 | 2578 | ||
2579 | /* on to the next size and buddy. | 2579 | /* on to the next size and buddy. |
2580 | */ | 2580 | */ |
2581 | cursz -= 1; | 2581 | cursz -= 1; |
2582 | budsz >>= 1; | 2582 | budsz >>= 1; |
2583 | } | 2583 | } |
2584 | } | 2584 | } |
2585 | 2585 | ||
2586 | /* adjust the dmap tree to reflect the specified leaf's new | 2586 | /* adjust the dmap tree to reflect the specified leaf's new |
2587 | * value. | 2587 | * value. |
2588 | */ | 2588 | */ |
2589 | dbAdjTree(tp, leafno, newval); | 2589 | dbAdjTree(tp, leafno, newval); |
2590 | } | 2590 | } |
2591 | 2591 | ||
2592 | 2592 | ||
2593 | /* | 2593 | /* |
2594 | * NAME: dbBackSplit() | 2594 | * NAME: dbBackSplit() |
2595 | * | 2595 | * |
2596 | * FUNCTION: back split the binary buddy system of dmtree leaves | 2596 | * FUNCTION: back split the binary buddy system of dmtree leaves |
2597 | * that hold a specified leaf until the specified leaf | 2597 | * that hold a specified leaf until the specified leaf |
2598 | * starts its own binary buddy system. | 2598 | * starts its own binary buddy system. |
2599 | * | 2599 | * |
2600 | * the allocators typically perform allocations at the start | 2600 | * the allocators typically perform allocations at the start |
2601 | * of binary buddy systems and dbSplit() is used to accomplish | 2601 | * of binary buddy systems and dbSplit() is used to accomplish |
2602 | * any required splits. in some cases, however, allocation | 2602 | * any required splits. in some cases, however, allocation |
2603 | * may occur in the middle of a binary system and requires a | 2603 | * may occur in the middle of a binary system and requires a |
2604 | * back split, with the split proceeding out from the middle of | 2604 | * back split, with the split proceeding out from the middle of |
2605 | * the system (less efficient) rather than the start of the | 2605 | * the system (less efficient) rather than the start of the |
2606 | * system (more efficient). the cases in which a back split | 2606 | * system (more efficient). the cases in which a back split |
2607 | * is required are rare and are limited to the first allocation | 2607 | * is required are rare and are limited to the first allocation |
2608 | * within an allocation group which is a part (not first part) | 2608 | * within an allocation group which is a part (not first part) |
2609 | * of a larger binary buddy system and a few exception cases | 2609 | * of a larger binary buddy system and a few exception cases |
2610 | * in which a previous join operation must be backed out. | 2610 | * in which a previous join operation must be backed out. |
2611 | * | 2611 | * |
2612 | * PARAMETERS: | 2612 | * PARAMETERS: |
2613 | * tp - pointer to the tree containing the leaf. | 2613 | * tp - pointer to the tree containing the leaf. |
2614 | * leafno - the number of the leaf to be updated. | 2614 | * leafno - the number of the leaf to be updated. |
2615 | * | 2615 | * |
2616 | * RETURN VALUES: none | 2616 | * RETURN VALUES: none |
2617 | * | 2617 | * |
2618 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 2618 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
2619 | */ | 2619 | */ |
2620 | static int dbBackSplit(dmtree_t * tp, int leafno) | 2620 | static int dbBackSplit(dmtree_t * tp, int leafno) |
2621 | { | 2621 | { |
2622 | int budsz, bud, w, bsz, size; | 2622 | int budsz, bud, w, bsz, size; |
2623 | int cursz; | 2623 | int cursz; |
2624 | s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); | 2624 | s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); |
2625 | 2625 | ||
2626 | /* leaf should be part (not first part) of a binary | 2626 | /* leaf should be part (not first part) of a binary |
2627 | * buddy system. | 2627 | * buddy system. |
2628 | */ | 2628 | */ |
2629 | assert(leaf[leafno] == NOFREE); | 2629 | assert(leaf[leafno] == NOFREE); |
2630 | 2630 | ||
2631 | /* the back split is accomplished by iteratively finding the leaf | 2631 | /* the back split is accomplished by iteratively finding the leaf |
2632 | * that starts the buddy system that contains the specified leaf and | 2632 | * that starts the buddy system that contains the specified leaf and |
2633 | * splitting that system in two. this iteration continues until | 2633 | * splitting that system in two. this iteration continues until |
2634 | * the specified leaf becomes the start of a buddy system. | 2634 | * the specified leaf becomes the start of a buddy system. |
2635 | * | 2635 | * |
2636 | * determine maximum possible l2 size for the specified leaf. | 2636 | * determine maximum possible l2 size for the specified leaf. |
2637 | */ | 2637 | */ |
2638 | size = | 2638 | size = |
2639 | LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs), | 2639 | LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs), |
2640 | tp->dmt_budmin); | 2640 | tp->dmt_budmin); |
2641 | 2641 | ||
2642 | /* determine the number of leaves covered by this size. this | 2642 | /* determine the number of leaves covered by this size. this |
2643 | * is the buddy size that we will start with as we search for | 2643 | * is the buddy size that we will start with as we search for |
2644 | * the buddy system that contains the specified leaf. | 2644 | * the buddy system that contains the specified leaf. |
2645 | */ | 2645 | */ |
2646 | budsz = BUDSIZE(size, tp->dmt_budmin); | 2646 | budsz = BUDSIZE(size, tp->dmt_budmin); |
2647 | 2647 | ||
2648 | /* back split. | 2648 | /* back split. |
2649 | */ | 2649 | */ |
2650 | while (leaf[leafno] == NOFREE) { | 2650 | while (leaf[leafno] == NOFREE) { |
2651 | /* find the leftmost buddy leaf. | 2651 | /* find the leftmost buddy leaf. |
2652 | */ | 2652 | */ |
2653 | for (w = leafno, bsz = budsz;; bsz <<= 1, | 2653 | for (w = leafno, bsz = budsz;; bsz <<= 1, |
2654 | w = (w < bud) ? w : bud) { | 2654 | w = (w < bud) ? w : bud) { |
2655 | if (bsz >= le32_to_cpu(tp->dmt_nleafs)) { | 2655 | if (bsz >= le32_to_cpu(tp->dmt_nleafs)) { |
2656 | jfs_err("JFS: block map error in dbBackSplit"); | 2656 | jfs_err("JFS: block map error in dbBackSplit"); |
2657 | return -EIO; | 2657 | return -EIO; |
2658 | } | 2658 | } |
2659 | 2659 | ||
2660 | /* determine the buddy. | 2660 | /* determine the buddy. |
2661 | */ | 2661 | */ |
2662 | bud = w ^ bsz; | 2662 | bud = w ^ bsz; |
2663 | 2663 | ||
2664 | /* check if this buddy is the start of the system. | 2664 | /* check if this buddy is the start of the system. |
2665 | */ | 2665 | */ |
2666 | if (leaf[bud] != NOFREE) { | 2666 | if (leaf[bud] != NOFREE) { |
2667 | /* split the leaf at the start of the | 2667 | /* split the leaf at the start of the |
2668 | * system in two. | 2668 | * system in two. |
2669 | */ | 2669 | */ |
2670 | cursz = leaf[bud] - 1; | 2670 | cursz = leaf[bud] - 1; |
2671 | dbSplit(tp, bud, cursz, cursz); | 2671 | dbSplit(tp, bud, cursz, cursz); |
2672 | break; | 2672 | break; |
2673 | } | 2673 | } |
2674 | } | 2674 | } |
2675 | } | 2675 | } |
2676 | 2676 | ||
2677 | if (leaf[leafno] != size) { | 2677 | if (leaf[leafno] != size) { |
2678 | jfs_err("JFS: wrong leaf value in dbBackSplit"); | 2678 | jfs_err("JFS: wrong leaf value in dbBackSplit"); |
2679 | return -EIO; | 2679 | return -EIO; |
2680 | } | 2680 | } |
2681 | return 0; | 2681 | return 0; |
2682 | } | 2682 | } |
2683 | 2683 | ||
2684 | 2684 | ||
2685 | /* | 2685 | /* |
2686 | * NAME: dbJoin() | 2686 | * NAME: dbJoin() |
2687 | * | 2687 | * |
2688 | * FUNCTION: update the leaf of a dmtree with a new value, joining | 2688 | * FUNCTION: update the leaf of a dmtree with a new value, joining |
2689 | * the leaf with other leaves of the dmtree into a multi-leaf | 2689 | * the leaf with other leaves of the dmtree into a multi-leaf |
2690 | * binary buddy system, as required. | 2690 | * binary buddy system, as required. |
2691 | * | 2691 | * |
2692 | * PARAMETERS: | 2692 | * PARAMETERS: |
2693 | * tp - pointer to the tree containing the leaf. | 2693 | * tp - pointer to the tree containing the leaf. |
2694 | * leafno - the number of the leaf to be updated. | 2694 | * leafno - the number of the leaf to be updated. |
2695 | * newval - the new value for the leaf. | 2695 | * newval - the new value for the leaf. |
2696 | * | 2696 | * |
2697 | * RETURN VALUES: none | 2697 | * RETURN VALUES: none |
2698 | */ | 2698 | */ |
2699 | static int dbJoin(dmtree_t * tp, int leafno, int newval) | 2699 | static int dbJoin(dmtree_t * tp, int leafno, int newval) |
2700 | { | 2700 | { |
2701 | int budsz, buddy; | 2701 | int budsz, buddy; |
2702 | s8 *leaf; | 2702 | s8 *leaf; |
2703 | 2703 | ||
2704 | /* can the new leaf value require a join with other leaves ? | 2704 | /* can the new leaf value require a join with other leaves ? |
2705 | */ | 2705 | */ |
2706 | if (newval >= tp->dmt_budmin) { | 2706 | if (newval >= tp->dmt_budmin) { |
2707 | /* pickup a pointer to the leaves of the tree. | 2707 | /* pickup a pointer to the leaves of the tree. |
2708 | */ | 2708 | */ |
2709 | leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); | 2709 | leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); |
2710 | 2710 | ||
2711 | /* try to join the specified leaf into a large binary | 2711 | /* try to join the specified leaf into a large binary |
2712 | * buddy system. the join proceeds by attempting to join | 2712 | * buddy system. the join proceeds by attempting to join |
2713 | * the specified leafno with its buddy (leaf) at new value. | 2713 | * the specified leafno with its buddy (leaf) at new value. |
2714 | * if the join occurs, we attempt to join the left leaf | 2714 | * if the join occurs, we attempt to join the left leaf |
2715 | * of the joined buddies with its buddy at new value + 1. | 2715 | * of the joined buddies with its buddy at new value + 1. |
2716 | * we continue to join until we find a buddy that cannot be | 2716 | * we continue to join until we find a buddy that cannot be |
2717 | * joined (does not have a value equal to the size of the | 2717 | * joined (does not have a value equal to the size of the |
2718 | * last join) or until all leaves have been joined into a | 2718 | * last join) or until all leaves have been joined into a |
2719 | * single system. | 2719 | * single system. |
2720 | * | 2720 | * |
2721 | * get the buddy size (number of words covered) of | 2721 | * get the buddy size (number of words covered) of |
2722 | * the new value. | 2722 | * the new value. |
2723 | */ | 2723 | */ |
2724 | budsz = BUDSIZE(newval, tp->dmt_budmin); | 2724 | budsz = BUDSIZE(newval, tp->dmt_budmin); |
2725 | 2725 | ||
2726 | /* try to join. | 2726 | /* try to join. |
2727 | */ | 2727 | */ |
2728 | while (budsz < le32_to_cpu(tp->dmt_nleafs)) { | 2728 | while (budsz < le32_to_cpu(tp->dmt_nleafs)) { |
2729 | /* get the buddy leaf. | 2729 | /* get the buddy leaf. |
2730 | */ | 2730 | */ |
2731 | buddy = leafno ^ budsz; | 2731 | buddy = leafno ^ budsz; |
2732 | 2732 | ||
2733 | /* if the leaf's new value is greater than its | 2733 | /* if the leaf's new value is greater than its |
2734 | * buddy's value, we join no more. | 2734 | * buddy's value, we join no more. |
2735 | */ | 2735 | */ |
2736 | if (newval > leaf[buddy]) | 2736 | if (newval > leaf[buddy]) |
2737 | break; | 2737 | break; |
2738 | 2738 | ||
2739 | /* It shouldn't be less */ | 2739 | /* It shouldn't be less */ |
2740 | if (newval < leaf[buddy]) | 2740 | if (newval < leaf[buddy]) |
2741 | return -EIO; | 2741 | return -EIO; |
2742 | 2742 | ||
2743 | /* check which (leafno or buddy) is the left buddy. | 2743 | /* check which (leafno or buddy) is the left buddy. |
2744 | * the left buddy gets to claim the blocks resulting | 2744 | * the left buddy gets to claim the blocks resulting |
2745 | * from the join while the right gets to claim none. | 2745 | * from the join while the right gets to claim none. |
2746 | * the left buddy is also eligable to participate in | 2746 | * the left buddy is also eligable to participate in |
2747 | * a join at the next higher level while the right | 2747 | * a join at the next higher level while the right |
2748 | * is not. | 2748 | * is not. |
2749 | * | 2749 | * |
2750 | */ | 2750 | */ |
2751 | if (leafno < buddy) { | 2751 | if (leafno < buddy) { |
2752 | /* leafno is the left buddy. | 2752 | /* leafno is the left buddy. |
2753 | */ | 2753 | */ |
2754 | dbAdjTree(tp, buddy, NOFREE); | 2754 | dbAdjTree(tp, buddy, NOFREE); |
2755 | } else { | 2755 | } else { |
2756 | /* buddy is the left buddy and becomes | 2756 | /* buddy is the left buddy and becomes |
2757 | * leafno. | 2757 | * leafno. |
2758 | */ | 2758 | */ |
2759 | dbAdjTree(tp, leafno, NOFREE); | 2759 | dbAdjTree(tp, leafno, NOFREE); |
2760 | leafno = buddy; | 2760 | leafno = buddy; |
2761 | } | 2761 | } |
2762 | 2762 | ||
2763 | /* on to try the next join. | 2763 | /* on to try the next join. |
2764 | */ | 2764 | */ |
2765 | newval += 1; | 2765 | newval += 1; |
2766 | budsz <<= 1; | 2766 | budsz <<= 1; |
2767 | } | 2767 | } |
2768 | } | 2768 | } |
2769 | 2769 | ||
2770 | /* update the leaf value. | 2770 | /* update the leaf value. |
2771 | */ | 2771 | */ |
2772 | dbAdjTree(tp, leafno, newval); | 2772 | dbAdjTree(tp, leafno, newval); |
2773 | 2773 | ||
2774 | return 0; | 2774 | return 0; |
2775 | } | 2775 | } |
2776 | 2776 | ||
2777 | 2777 | ||
2778 | /* | 2778 | /* |
2779 | * NAME: dbAdjTree() | 2779 | * NAME: dbAdjTree() |
2780 | * | 2780 | * |
2781 | * FUNCTION: update a leaf of a dmtree with a new value, adjusting | 2781 | * FUNCTION: update a leaf of a dmtree with a new value, adjusting |
2782 | * the dmtree, as required, to reflect the new leaf value. | 2782 | * the dmtree, as required, to reflect the new leaf value. |
2783 | * the combination of any buddies must already be done before | 2783 | * the combination of any buddies must already be done before |
2784 | * this is called. | 2784 | * this is called. |
2785 | * | 2785 | * |
2786 | * PARAMETERS: | 2786 | * PARAMETERS: |
2787 | * tp - pointer to the tree to be adjusted. | 2787 | * tp - pointer to the tree to be adjusted. |
2788 | * leafno - the number of the leaf to be updated. | 2788 | * leafno - the number of the leaf to be updated. |
2789 | * newval - the new value for the leaf. | 2789 | * newval - the new value for the leaf. |
2790 | * | 2790 | * |
2791 | * RETURN VALUES: none | 2791 | * RETURN VALUES: none |
2792 | */ | 2792 | */ |
2793 | static void dbAdjTree(dmtree_t * tp, int leafno, int newval) | 2793 | static void dbAdjTree(dmtree_t * tp, int leafno, int newval) |
2794 | { | 2794 | { |
2795 | int lp, pp, k; | 2795 | int lp, pp, k; |
2796 | int max; | 2796 | int max; |
2797 | 2797 | ||
2798 | /* pick up the index of the leaf for this leafno. | 2798 | /* pick up the index of the leaf for this leafno. |
2799 | */ | 2799 | */ |
2800 | lp = leafno + le32_to_cpu(tp->dmt_leafidx); | 2800 | lp = leafno + le32_to_cpu(tp->dmt_leafidx); |
2801 | 2801 | ||
2802 | /* is the current value the same as the old value ? if so, | 2802 | /* is the current value the same as the old value ? if so, |
2803 | * there is nothing to do. | 2803 | * there is nothing to do. |
2804 | */ | 2804 | */ |
2805 | if (tp->dmt_stree[lp] == newval) | 2805 | if (tp->dmt_stree[lp] == newval) |
2806 | return; | 2806 | return; |
2807 | 2807 | ||
2808 | /* set the new value. | 2808 | /* set the new value. |
2809 | */ | 2809 | */ |
2810 | tp->dmt_stree[lp] = newval; | 2810 | tp->dmt_stree[lp] = newval; |
2811 | 2811 | ||
2812 | /* bubble the new value up the tree as required. | 2812 | /* bubble the new value up the tree as required. |
2813 | */ | 2813 | */ |
2814 | for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) { | 2814 | for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) { |
2815 | /* get the index of the first leaf of the 4 leaf | 2815 | /* get the index of the first leaf of the 4 leaf |
2816 | * group containing the specified leaf (leafno). | 2816 | * group containing the specified leaf (leafno). |
2817 | */ | 2817 | */ |
2818 | lp = ((lp - 1) & ~0x03) + 1; | 2818 | lp = ((lp - 1) & ~0x03) + 1; |
2819 | 2819 | ||
2820 | /* get the index of the parent of this 4 leaf group. | 2820 | /* get the index of the parent of this 4 leaf group. |
2821 | */ | 2821 | */ |
2822 | pp = (lp - 1) >> 2; | 2822 | pp = (lp - 1) >> 2; |
2823 | 2823 | ||
2824 | /* determine the maximum of the 4 leaves. | 2824 | /* determine the maximum of the 4 leaves. |
2825 | */ | 2825 | */ |
2826 | max = TREEMAX(&tp->dmt_stree[lp]); | 2826 | max = TREEMAX(&tp->dmt_stree[lp]); |
2827 | 2827 | ||
2828 | /* if the maximum of the 4 is the same as the | 2828 | /* if the maximum of the 4 is the same as the |
2829 | * parent's value, we're done. | 2829 | * parent's value, we're done. |
2830 | */ | 2830 | */ |
2831 | if (tp->dmt_stree[pp] == max) | 2831 | if (tp->dmt_stree[pp] == max) |
2832 | break; | 2832 | break; |
2833 | 2833 | ||
2834 | /* parent gets new value. | 2834 | /* parent gets new value. |
2835 | */ | 2835 | */ |
2836 | tp->dmt_stree[pp] = max; | 2836 | tp->dmt_stree[pp] = max; |
2837 | 2837 | ||
2838 | /* parent becomes leaf for next go-round. | 2838 | /* parent becomes leaf for next go-round. |
2839 | */ | 2839 | */ |
2840 | lp = pp; | 2840 | lp = pp; |
2841 | } | 2841 | } |
2842 | } | 2842 | } |
2843 | 2843 | ||
2844 | 2844 | ||
2845 | /* | 2845 | /* |
2846 | * NAME: dbFindLeaf() | 2846 | * NAME: dbFindLeaf() |
2847 | * | 2847 | * |
2848 | * FUNCTION: search a dmtree_t for sufficient free blocks, returning | 2848 | * FUNCTION: search a dmtree_t for sufficient free blocks, returning |
2849 | * the index of a leaf describing the free blocks if | 2849 | * the index of a leaf describing the free blocks if |
2850 | * sufficient free blocks are found. | 2850 | * sufficient free blocks are found. |
2851 | * | 2851 | * |
2852 | * the search starts at the top of the dmtree_t tree and | 2852 | * the search starts at the top of the dmtree_t tree and |
2853 | * proceeds down the tree to the leftmost leaf with sufficient | 2853 | * proceeds down the tree to the leftmost leaf with sufficient |
2854 | * free space. | 2854 | * free space. |
2855 | * | 2855 | * |
2856 | * PARAMETERS: | 2856 | * PARAMETERS: |
2857 | * tp - pointer to the tree to be searched. | 2857 | * tp - pointer to the tree to be searched. |
2858 | * l2nb - log2 number of free blocks to search for. | 2858 | * l2nb - log2 number of free blocks to search for. |
2859 | * leafidx - return pointer to be set to the index of the leaf | 2859 | * leafidx - return pointer to be set to the index of the leaf |
2860 | * describing at least l2nb free blocks if sufficient | 2860 | * describing at least l2nb free blocks if sufficient |
2861 | * free blocks are found. | 2861 | * free blocks are found. |
2862 | * | 2862 | * |
2863 | * RETURN VALUES: | 2863 | * RETURN VALUES: |
2864 | * 0 - success | 2864 | * 0 - success |
2865 | * -ENOSPC - insufficient free blocks. | 2865 | * -ENOSPC - insufficient free blocks. |
2866 | */ | 2866 | */ |
2867 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) | 2867 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) |
2868 | { | 2868 | { |
2869 | int ti, n = 0, k, x = 0; | 2869 | int ti, n = 0, k, x = 0; |
2870 | 2870 | ||
2871 | /* first check the root of the tree to see if there is | 2871 | /* first check the root of the tree to see if there is |
2872 | * sufficient free space. | 2872 | * sufficient free space. |
2873 | */ | 2873 | */ |
2874 | if (l2nb > tp->dmt_stree[ROOT]) | 2874 | if (l2nb > tp->dmt_stree[ROOT]) |
2875 | return -ENOSPC; | 2875 | return -ENOSPC; |
2876 | 2876 | ||
2877 | /* sufficient free space available. now search down the tree | 2877 | /* sufficient free space available. now search down the tree |
2878 | * starting at the next level for the leftmost leaf that | 2878 | * starting at the next level for the leftmost leaf that |
2879 | * describes sufficient free space. | 2879 | * describes sufficient free space. |
2880 | */ | 2880 | */ |
2881 | for (k = le32_to_cpu(tp->dmt_height), ti = 1; | 2881 | for (k = le32_to_cpu(tp->dmt_height), ti = 1; |
2882 | k > 0; k--, ti = ((ti + n) << 2) + 1) { | 2882 | k > 0; k--, ti = ((ti + n) << 2) + 1) { |
2883 | /* search the four nodes at this level, starting from | 2883 | /* search the four nodes at this level, starting from |
2884 | * the left. | 2884 | * the left. |
2885 | */ | 2885 | */ |
2886 | for (x = ti, n = 0; n < 4; n++) { | 2886 | for (x = ti, n = 0; n < 4; n++) { |
2887 | /* sufficient free space found. move to the next | 2887 | /* sufficient free space found. move to the next |
2888 | * level (or quit if this is the last level). | 2888 | * level (or quit if this is the last level). |
2889 | */ | 2889 | */ |
2890 | if (l2nb <= tp->dmt_stree[x + n]) | 2890 | if (l2nb <= tp->dmt_stree[x + n]) |
2891 | break; | 2891 | break; |
2892 | } | 2892 | } |
2893 | 2893 | ||
2894 | /* better have found something since the higher | 2894 | /* better have found something since the higher |
2895 | * levels of the tree said it was here. | 2895 | * levels of the tree said it was here. |
2896 | */ | 2896 | */ |
2897 | assert(n < 4); | 2897 | assert(n < 4); |
2898 | } | 2898 | } |
2899 | 2899 | ||
2900 | /* set the return to the leftmost leaf describing sufficient | 2900 | /* set the return to the leftmost leaf describing sufficient |
2901 | * free space. | 2901 | * free space. |
2902 | */ | 2902 | */ |
2903 | *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx); | 2903 | *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx); |
2904 | 2904 | ||
2905 | return (0); | 2905 | return (0); |
2906 | } | 2906 | } |
2907 | 2907 | ||
2908 | 2908 | ||
2909 | /* | 2909 | /* |
2910 | * NAME: dbFindBits() | 2910 | * NAME: dbFindBits() |
2911 | * | 2911 | * |
2912 | * FUNCTION: find a specified number of binary buddy free bits within a | 2912 | * FUNCTION: find a specified number of binary buddy free bits within a |
2913 | * dmap bitmap word value. | 2913 | * dmap bitmap word value. |
2914 | * | 2914 | * |
2915 | * this routine searches the bitmap value for (1 << l2nb) free | 2915 | * this routine searches the bitmap value for (1 << l2nb) free |
2916 | * bits at (1 << l2nb) alignments within the value. | 2916 | * bits at (1 << l2nb) alignments within the value. |
2917 | * | 2917 | * |
2918 | * PARAMETERS: | 2918 | * PARAMETERS: |
2919 | * word - dmap bitmap word value. | 2919 | * word - dmap bitmap word value. |
2920 | * l2nb - number of free bits specified as a log2 number. | 2920 | * l2nb - number of free bits specified as a log2 number. |
2921 | * | 2921 | * |
2922 | * RETURN VALUES: | 2922 | * RETURN VALUES: |
2923 | * starting bit number of free bits. | 2923 | * starting bit number of free bits. |
2924 | */ | 2924 | */ |
2925 | static int dbFindBits(u32 word, int l2nb) | 2925 | static int dbFindBits(u32 word, int l2nb) |
2926 | { | 2926 | { |
2927 | int bitno, nb; | 2927 | int bitno, nb; |
2928 | u32 mask; | 2928 | u32 mask; |
2929 | 2929 | ||
2930 | /* get the number of bits. | 2930 | /* get the number of bits. |
2931 | */ | 2931 | */ |
2932 | nb = 1 << l2nb; | 2932 | nb = 1 << l2nb; |
2933 | assert(nb <= DBWORD); | 2933 | assert(nb <= DBWORD); |
2934 | 2934 | ||
2935 | /* complement the word so we can use a mask (i.e. 0s represent | 2935 | /* complement the word so we can use a mask (i.e. 0s represent |
2936 | * free bits) and compute the mask. | 2936 | * free bits) and compute the mask. |
2937 | */ | 2937 | */ |
2938 | word = ~word; | 2938 | word = ~word; |
2939 | mask = ONES << (DBWORD - nb); | 2939 | mask = ONES << (DBWORD - nb); |
2940 | 2940 | ||
2941 | /* scan the word for nb free bits at nb alignments. | 2941 | /* scan the word for nb free bits at nb alignments. |
2942 | */ | 2942 | */ |
2943 | for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) { | 2943 | for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) { |
2944 | if ((mask & word) == mask) | 2944 | if ((mask & word) == mask) |
2945 | break; | 2945 | break; |
2946 | } | 2946 | } |
2947 | 2947 | ||
2948 | ASSERT(bitno < 32); | 2948 | ASSERT(bitno < 32); |
2949 | 2949 | ||
2950 | /* return the bit number. | 2950 | /* return the bit number. |
2951 | */ | 2951 | */ |
2952 | return (bitno); | 2952 | return (bitno); |
2953 | } | 2953 | } |
2954 | 2954 | ||
2955 | 2955 | ||
2956 | /* | 2956 | /* |
2957 | * NAME: dbMaxBud(u8 *cp) | 2957 | * NAME: dbMaxBud(u8 *cp) |
2958 | * | 2958 | * |
2959 | * FUNCTION: determine the largest binary buddy string of free | 2959 | * FUNCTION: determine the largest binary buddy string of free |
2960 | * bits within 32-bits of the map. | 2960 | * bits within 32-bits of the map. |
2961 | * | 2961 | * |
2962 | * PARAMETERS: | 2962 | * PARAMETERS: |
2963 | * cp - pointer to the 32-bit value. | 2963 | * cp - pointer to the 32-bit value. |
2964 | * | 2964 | * |
2965 | * RETURN VALUES: | 2965 | * RETURN VALUES: |
2966 | * largest binary buddy of free bits within a dmap word. | 2966 | * largest binary buddy of free bits within a dmap word. |
2967 | */ | 2967 | */ |
2968 | static int dbMaxBud(u8 * cp) | 2968 | static int dbMaxBud(u8 * cp) |
2969 | { | 2969 | { |
2970 | signed char tmp1, tmp2; | 2970 | signed char tmp1, tmp2; |
2971 | 2971 | ||
2972 | /* check if the wmap word is all free. if so, the | 2972 | /* check if the wmap word is all free. if so, the |
2973 | * free buddy size is BUDMIN. | 2973 | * free buddy size is BUDMIN. |
2974 | */ | 2974 | */ |
2975 | if (*((uint *) cp) == 0) | 2975 | if (*((uint *) cp) == 0) |
2976 | return (BUDMIN); | 2976 | return (BUDMIN); |
2977 | 2977 | ||
2978 | /* check if the wmap word is half free. if so, the | 2978 | /* check if the wmap word is half free. if so, the |
2979 | * free buddy size is BUDMIN-1. | 2979 | * free buddy size is BUDMIN-1. |
2980 | */ | 2980 | */ |
2981 | if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0) | 2981 | if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0) |
2982 | return (BUDMIN - 1); | 2982 | return (BUDMIN - 1); |
2983 | 2983 | ||
2984 | /* not all free or half free. determine the free buddy | 2984 | /* not all free or half free. determine the free buddy |
2985 | * size thru table lookup using quarters of the wmap word. | 2985 | * size thru table lookup using quarters of the wmap word. |
2986 | */ | 2986 | */ |
2987 | tmp1 = max(budtab[cp[2]], budtab[cp[3]]); | 2987 | tmp1 = max(budtab[cp[2]], budtab[cp[3]]); |
2988 | tmp2 = max(budtab[cp[0]], budtab[cp[1]]); | 2988 | tmp2 = max(budtab[cp[0]], budtab[cp[1]]); |
2989 | return (max(tmp1, tmp2)); | 2989 | return (max(tmp1, tmp2)); |
2990 | } | 2990 | } |
2991 | 2991 | ||
2992 | 2992 | ||
2993 | /* | 2993 | /* |
2994 | * NAME: cnttz(uint word) | 2994 | * NAME: cnttz(uint word) |
2995 | * | 2995 | * |
2996 | * FUNCTION: determine the number of trailing zeros within a 32-bit | 2996 | * FUNCTION: determine the number of trailing zeros within a 32-bit |
2997 | * value. | 2997 | * value. |
2998 | * | 2998 | * |
2999 | * PARAMETERS: | 2999 | * PARAMETERS: |
3000 | * value - 32-bit value to be examined. | 3000 | * value - 32-bit value to be examined. |
3001 | * | 3001 | * |
3002 | * RETURN VALUES: | 3002 | * RETURN VALUES: |
3003 | * count of trailing zeros | 3003 | * count of trailing zeros |
3004 | */ | 3004 | */ |
3005 | static int cnttz(u32 word) | 3005 | static int cnttz(u32 word) |
3006 | { | 3006 | { |
3007 | int n; | 3007 | int n; |
3008 | 3008 | ||
3009 | for (n = 0; n < 32; n++, word >>= 1) { | 3009 | for (n = 0; n < 32; n++, word >>= 1) { |
3010 | if (word & 0x01) | 3010 | if (word & 0x01) |
3011 | break; | 3011 | break; |
3012 | } | 3012 | } |
3013 | 3013 | ||
3014 | return (n); | 3014 | return (n); |
3015 | } | 3015 | } |
3016 | 3016 | ||
3017 | 3017 | ||
3018 | /* | 3018 | /* |
3019 | * NAME: cntlz(u32 value) | 3019 | * NAME: cntlz(u32 value) |
3020 | * | 3020 | * |
3021 | * FUNCTION: determine the number of leading zeros within a 32-bit | 3021 | * FUNCTION: determine the number of leading zeros within a 32-bit |
3022 | * value. | 3022 | * value. |
3023 | * | 3023 | * |
3024 | * PARAMETERS: | 3024 | * PARAMETERS: |
3025 | * value - 32-bit value to be examined. | 3025 | * value - 32-bit value to be examined. |
3026 | * | 3026 | * |
3027 | * RETURN VALUES: | 3027 | * RETURN VALUES: |
3028 | * count of leading zeros | 3028 | * count of leading zeros |
3029 | */ | 3029 | */ |
3030 | static int cntlz(u32 value) | 3030 | static int cntlz(u32 value) |
3031 | { | 3031 | { |
3032 | int n; | 3032 | int n; |
3033 | 3033 | ||
3034 | for (n = 0; n < 32; n++, value <<= 1) { | 3034 | for (n = 0; n < 32; n++, value <<= 1) { |
3035 | if (value & HIGHORDER) | 3035 | if (value & HIGHORDER) |
3036 | break; | 3036 | break; |
3037 | } | 3037 | } |
3038 | return (n); | 3038 | return (n); |
3039 | } | 3039 | } |
3040 | 3040 | ||
3041 | 3041 | ||
3042 | /* | 3042 | /* |
3043 | * NAME: blkstol2(s64 nb) | 3043 | * NAME: blkstol2(s64 nb) |
3044 | * | 3044 | * |
3045 | * FUNCTION: convert a block count to its log2 value. if the block | 3045 | * FUNCTION: convert a block count to its log2 value. if the block |
3046 | * count is not a l2 multiple, it is rounded up to the next | 3046 | * count is not a l2 multiple, it is rounded up to the next |
3047 | * larger l2 multiple. | 3047 | * larger l2 multiple. |
3048 | * | 3048 | * |
3049 | * PARAMETERS: | 3049 | * PARAMETERS: |
3050 | * nb - number of blocks | 3050 | * nb - number of blocks |
3051 | * | 3051 | * |
3052 | * RETURN VALUES: | 3052 | * RETURN VALUES: |
3053 | * log2 number of blocks | 3053 | * log2 number of blocks |
3054 | */ | 3054 | */ |
3055 | static int blkstol2(s64 nb) | 3055 | static int blkstol2(s64 nb) |
3056 | { | 3056 | { |
3057 | int l2nb; | 3057 | int l2nb; |
3058 | s64 mask; /* meant to be signed */ | 3058 | s64 mask; /* meant to be signed */ |
3059 | 3059 | ||
3060 | mask = (s64) 1 << (64 - 1); | 3060 | mask = (s64) 1 << (64 - 1); |
3061 | 3061 | ||
3062 | /* count the leading bits. | 3062 | /* count the leading bits. |
3063 | */ | 3063 | */ |
3064 | for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) { | 3064 | for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) { |
3065 | /* leading bit found. | 3065 | /* leading bit found. |
3066 | */ | 3066 | */ |
3067 | if (nb & mask) { | 3067 | if (nb & mask) { |
3068 | /* determine the l2 value. | 3068 | /* determine the l2 value. |
3069 | */ | 3069 | */ |
3070 | l2nb = (64 - 1) - l2nb; | 3070 | l2nb = (64 - 1) - l2nb; |
3071 | 3071 | ||
3072 | /* check if we need to round up. | 3072 | /* check if we need to round up. |
3073 | */ | 3073 | */ |
3074 | if (~mask & nb) | 3074 | if (~mask & nb) |
3075 | l2nb++; | 3075 | l2nb++; |
3076 | 3076 | ||
3077 | return (l2nb); | 3077 | return (l2nb); |
3078 | } | 3078 | } |
3079 | } | 3079 | } |
3080 | assert(0); | 3080 | assert(0); |
3081 | return 0; /* fix compiler warning */ | 3081 | return 0; /* fix compiler warning */ |
3082 | } | 3082 | } |
3083 | 3083 | ||
3084 | 3084 | ||
3085 | /* | 3085 | /* |
3086 | * NAME: dbAllocBottomUp() | 3086 | * NAME: dbAllocBottomUp() |
3087 | * | 3087 | * |
3088 | * FUNCTION: alloc the specified block range from the working block | 3088 | * FUNCTION: alloc the specified block range from the working block |
3089 | * allocation map. | 3089 | * allocation map. |
3090 | * | 3090 | * |
3091 | * the blocks will be alloc from the working map one dmap | 3091 | * the blocks will be alloc from the working map one dmap |
3092 | * at a time. | 3092 | * at a time. |
3093 | * | 3093 | * |
3094 | * PARAMETERS: | 3094 | * PARAMETERS: |
3095 | * ip - pointer to in-core inode; | 3095 | * ip - pointer to in-core inode; |
3096 | * blkno - starting block number to be freed. | 3096 | * blkno - starting block number to be freed. |
3097 | * nblocks - number of blocks to be freed. | 3097 | * nblocks - number of blocks to be freed. |
3098 | * | 3098 | * |
3099 | * RETURN VALUES: | 3099 | * RETURN VALUES: |
3100 | * 0 - success | 3100 | * 0 - success |
3101 | * -EIO - i/o error | 3101 | * -EIO - i/o error |
3102 | */ | 3102 | */ |
3103 | int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) | 3103 | int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) |
3104 | { | 3104 | { |
3105 | struct metapage *mp; | 3105 | struct metapage *mp; |
3106 | struct dmap *dp; | 3106 | struct dmap *dp; |
3107 | int nb, rc; | 3107 | int nb, rc; |
3108 | s64 lblkno, rem; | 3108 | s64 lblkno, rem; |
3109 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 3109 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
3110 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 3110 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; |
3111 | 3111 | ||
3112 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 3112 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
3113 | 3113 | ||
3114 | /* block to be allocated better be within the mapsize. */ | 3114 | /* block to be allocated better be within the mapsize. */ |
3115 | ASSERT(nblocks <= bmp->db_mapsize - blkno); | 3115 | ASSERT(nblocks <= bmp->db_mapsize - blkno); |
3116 | 3116 | ||
3117 | /* | 3117 | /* |
3118 | * allocate the blocks a dmap at a time. | 3118 | * allocate the blocks a dmap at a time. |
3119 | */ | 3119 | */ |
3120 | mp = NULL; | 3120 | mp = NULL; |
3121 | for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { | 3121 | for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { |
3122 | /* release previous dmap if any */ | 3122 | /* release previous dmap if any */ |
3123 | if (mp) { | 3123 | if (mp) { |
3124 | write_metapage(mp); | 3124 | write_metapage(mp); |
3125 | } | 3125 | } |
3126 | 3126 | ||
3127 | /* get the buffer for the current dmap. */ | 3127 | /* get the buffer for the current dmap. */ |
3128 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 3128 | lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); |
3129 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 3129 | mp = read_metapage(ipbmap, lblkno, PSIZE, 0); |
3130 | if (mp == NULL) { | 3130 | if (mp == NULL) { |
3131 | IREAD_UNLOCK(ipbmap); | 3131 | IREAD_UNLOCK(ipbmap); |
3132 | return -EIO; | 3132 | return -EIO; |
3133 | } | 3133 | } |
3134 | dp = (struct dmap *) mp->data; | 3134 | dp = (struct dmap *) mp->data; |
3135 | 3135 | ||
3136 | /* determine the number of blocks to be allocated from | 3136 | /* determine the number of blocks to be allocated from |
3137 | * this dmap. | 3137 | * this dmap. |
3138 | */ | 3138 | */ |
3139 | nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); | 3139 | nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); |
3140 | 3140 | ||
3141 | /* allocate the blocks. */ | 3141 | /* allocate the blocks. */ |
3142 | if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { | 3142 | if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { |
3143 | release_metapage(mp); | 3143 | release_metapage(mp); |
3144 | IREAD_UNLOCK(ipbmap); | 3144 | IREAD_UNLOCK(ipbmap); |
3145 | return (rc); | 3145 | return (rc); |
3146 | } | 3146 | } |
3147 | } | 3147 | } |
3148 | 3148 | ||
3149 | /* write the last buffer. */ | 3149 | /* write the last buffer. */ |
3150 | write_metapage(mp); | 3150 | write_metapage(mp); |
3151 | 3151 | ||
3152 | IREAD_UNLOCK(ipbmap); | 3152 | IREAD_UNLOCK(ipbmap); |
3153 | 3153 | ||
3154 | return (0); | 3154 | return (0); |
3155 | } | 3155 | } |
3156 | 3156 | ||
3157 | 3157 | ||
3158 | static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | 3158 | static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, |
3159 | int nblocks) | 3159 | int nblocks) |
3160 | { | 3160 | { |
3161 | int rc; | 3161 | int rc; |
3162 | int dbitno, word, rembits, nb, nwords, wbitno, agno; | 3162 | int dbitno, word, rembits, nb, nwords, wbitno, agno; |
3163 | s8 oldroot, *leaf; | 3163 | s8 oldroot, *leaf; |
3164 | struct dmaptree *tp = (struct dmaptree *) & dp->tree; | 3164 | struct dmaptree *tp = (struct dmaptree *) & dp->tree; |
3165 | 3165 | ||
3166 | /* save the current value of the root (i.e. maximum free string) | 3166 | /* save the current value of the root (i.e. maximum free string) |
3167 | * of the dmap tree. | 3167 | * of the dmap tree. |
3168 | */ | 3168 | */ |
3169 | oldroot = tp->stree[ROOT]; | 3169 | oldroot = tp->stree[ROOT]; |
3170 | 3170 | ||
3171 | /* pick up a pointer to the leaves of the dmap tree */ | 3171 | /* pick up a pointer to the leaves of the dmap tree */ |
3172 | leaf = tp->stree + LEAFIND; | 3172 | leaf = tp->stree + LEAFIND; |
3173 | 3173 | ||
3174 | /* determine the bit number and word within the dmap of the | 3174 | /* determine the bit number and word within the dmap of the |
3175 | * starting block. | 3175 | * starting block. |
3176 | */ | 3176 | */ |
3177 | dbitno = blkno & (BPERDMAP - 1); | 3177 | dbitno = blkno & (BPERDMAP - 1); |
3178 | word = dbitno >> L2DBWORD; | 3178 | word = dbitno >> L2DBWORD; |
3179 | 3179 | ||
3180 | /* block range better be within the dmap */ | 3180 | /* block range better be within the dmap */ |
3181 | assert(dbitno + nblocks <= BPERDMAP); | 3181 | assert(dbitno + nblocks <= BPERDMAP); |
3182 | 3182 | ||
3183 | /* allocate the bits of the dmap's words corresponding to the block | 3183 | /* allocate the bits of the dmap's words corresponding to the block |
3184 | * range. not all bits of the first and last words may be contained | 3184 | * range. not all bits of the first and last words may be contained |
3185 | * within the block range. if this is the case, we'll work against | 3185 | * within the block range. if this is the case, we'll work against |
3186 | * those words (i.e. partial first and/or last) on an individual basis | 3186 | * those words (i.e. partial first and/or last) on an individual basis |
3187 | * (a single pass), allocating the bits of interest by hand and | 3187 | * (a single pass), allocating the bits of interest by hand and |
3188 | * updating the leaf corresponding to the dmap word. a single pass | 3188 | * updating the leaf corresponding to the dmap word. a single pass |
3189 | * will be used for all dmap words fully contained within the | 3189 | * will be used for all dmap words fully contained within the |
3190 | * specified range. within this pass, the bits of all fully contained | 3190 | * specified range. within this pass, the bits of all fully contained |
3191 | * dmap words will be marked as free in a single shot and the leaves | 3191 | * dmap words will be marked as free in a single shot and the leaves |
3192 | * will be updated. a single leaf may describe the free space of | 3192 | * will be updated. a single leaf may describe the free space of |
3193 | * multiple dmap words, so we may update only a subset of the actual | 3193 | * multiple dmap words, so we may update only a subset of the actual |
3194 | * leaves corresponding to the dmap words of the block range. | 3194 | * leaves corresponding to the dmap words of the block range. |
3195 | */ | 3195 | */ |
3196 | for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { | 3196 | for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { |
3197 | /* determine the bit number within the word and | 3197 | /* determine the bit number within the word and |
3198 | * the number of bits within the word. | 3198 | * the number of bits within the word. |
3199 | */ | 3199 | */ |
3200 | wbitno = dbitno & (DBWORD - 1); | 3200 | wbitno = dbitno & (DBWORD - 1); |
3201 | nb = min(rembits, DBWORD - wbitno); | 3201 | nb = min(rembits, DBWORD - wbitno); |
3202 | 3202 | ||
3203 | /* check if only part of a word is to be allocated. | 3203 | /* check if only part of a word is to be allocated. |
3204 | */ | 3204 | */ |
3205 | if (nb < DBWORD) { | 3205 | if (nb < DBWORD) { |
3206 | /* allocate (set to 1) the appropriate bits within | 3206 | /* allocate (set to 1) the appropriate bits within |
3207 | * this dmap word. | 3207 | * this dmap word. |
3208 | */ | 3208 | */ |
3209 | dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) | 3209 | dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) |
3210 | >> wbitno); | 3210 | >> wbitno); |
3211 | 3211 | ||
3212 | word++; | 3212 | word++; |
3213 | } else { | 3213 | } else { |
3214 | /* one or more dmap words are fully contained | 3214 | /* one or more dmap words are fully contained |
3215 | * within the block range. determine how many | 3215 | * within the block range. determine how many |
3216 | * words and allocate (set to 1) the bits of these | 3216 | * words and allocate (set to 1) the bits of these |
3217 | * words. | 3217 | * words. |
3218 | */ | 3218 | */ |
3219 | nwords = rembits >> L2DBWORD; | 3219 | nwords = rembits >> L2DBWORD; |
3220 | memset(&dp->wmap[word], (int) ONES, nwords * 4); | 3220 | memset(&dp->wmap[word], (int) ONES, nwords * 4); |
3221 | 3221 | ||
3222 | /* determine how many bits */ | 3222 | /* determine how many bits */ |
3223 | nb = nwords << L2DBWORD; | 3223 | nb = nwords << L2DBWORD; |
3224 | word += nwords; | 3224 | word += nwords; |
3225 | } | 3225 | } |
3226 | } | 3226 | } |
3227 | 3227 | ||
3228 | /* update the free count for this dmap */ | 3228 | /* update the free count for this dmap */ |
3229 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); | 3229 | le32_add_cpu(&dp->nfree, -nblocks); |
3230 | 3230 | ||
3231 | /* reconstruct summary tree */ | 3231 | /* reconstruct summary tree */ |
3232 | dbInitDmapTree(dp); | 3232 | dbInitDmapTree(dp); |
3233 | 3233 | ||
3234 | BMAP_LOCK(bmp); | 3234 | BMAP_LOCK(bmp); |
3235 | 3235 | ||
3236 | /* if this allocation group is completely free, | 3236 | /* if this allocation group is completely free, |
3237 | * update the highest active allocation group number | 3237 | * update the highest active allocation group number |
3238 | * if this allocation group is the new max. | 3238 | * if this allocation group is the new max. |
3239 | */ | 3239 | */ |
3240 | agno = blkno >> bmp->db_agl2size; | 3240 | agno = blkno >> bmp->db_agl2size; |
3241 | if (agno > bmp->db_maxag) | 3241 | if (agno > bmp->db_maxag) |
3242 | bmp->db_maxag = agno; | 3242 | bmp->db_maxag = agno; |
3243 | 3243 | ||
3244 | /* update the free count for the allocation group and map */ | 3244 | /* update the free count for the allocation group and map */ |
3245 | bmp->db_agfree[agno] -= nblocks; | 3245 | bmp->db_agfree[agno] -= nblocks; |
3246 | bmp->db_nfree -= nblocks; | 3246 | bmp->db_nfree -= nblocks; |
3247 | 3247 | ||
3248 | BMAP_UNLOCK(bmp); | 3248 | BMAP_UNLOCK(bmp); |
3249 | 3249 | ||
3250 | /* if the root has not changed, done. */ | 3250 | /* if the root has not changed, done. */ |
3251 | if (tp->stree[ROOT] == oldroot) | 3251 | if (tp->stree[ROOT] == oldroot) |
3252 | return (0); | 3252 | return (0); |
3253 | 3253 | ||
3254 | /* root changed. bubble the change up to the dmap control pages. | 3254 | /* root changed. bubble the change up to the dmap control pages. |
3255 | * if the adjustment of the upper level control pages fails, | 3255 | * if the adjustment of the upper level control pages fails, |
3256 | * backout the bit allocation (thus making everything consistent). | 3256 | * backout the bit allocation (thus making everything consistent). |
3257 | */ | 3257 | */ |
3258 | if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0))) | 3258 | if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0))) |
3259 | dbFreeBits(bmp, dp, blkno, nblocks); | 3259 | dbFreeBits(bmp, dp, blkno, nblocks); |
3260 | 3260 | ||
3261 | return (rc); | 3261 | return (rc); |
3262 | } | 3262 | } |
3263 | 3263 | ||
3264 | 3264 | ||
3265 | /* | 3265 | /* |
3266 | * NAME: dbExtendFS() | 3266 | * NAME: dbExtendFS() |
3267 | * | 3267 | * |
3268 | * FUNCTION: extend bmap from blkno for nblocks; | 3268 | * FUNCTION: extend bmap from blkno for nblocks; |
3269 | * dbExtendFS() updates bmap ready for dbAllocBottomUp(); | 3269 | * dbExtendFS() updates bmap ready for dbAllocBottomUp(); |
3270 | * | 3270 | * |
3271 | * L2 | 3271 | * L2 |
3272 | * | | 3272 | * | |
3273 | * L1---------------------------------L1 | 3273 | * L1---------------------------------L1 |
3274 | * | | | 3274 | * | | |
3275 | * L0---------L0---------L0 L0---------L0---------L0 | 3275 | * L0---------L0---------L0 L0---------L0---------L0 |
3276 | * | | | | | | | 3276 | * | | | | | | |
3277 | * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; | 3277 | * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; |
3278 | * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm | 3278 | * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm |
3279 | * | 3279 | * |
3280 | * <---old---><----------------------------extend-----------------------> | 3280 | * <---old---><----------------------------extend-----------------------> |
3281 | */ | 3281 | */ |
3282 | int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | 3282 | int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) |
3283 | { | 3283 | { |
3284 | struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); | 3284 | struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); |
3285 | int nbperpage = sbi->nbperpage; | 3285 | int nbperpage = sbi->nbperpage; |
3286 | int i, i0 = true, j, j0 = true, k, n; | 3286 | int i, i0 = true, j, j0 = true, k, n; |
3287 | s64 newsize; | 3287 | s64 newsize; |
3288 | s64 p; | 3288 | s64 p; |
3289 | struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL; | 3289 | struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL; |
3290 | struct dmapctl *l2dcp, *l1dcp, *l0dcp; | 3290 | struct dmapctl *l2dcp, *l1dcp, *l0dcp; |
3291 | struct dmap *dp; | 3291 | struct dmap *dp; |
3292 | s8 *l0leaf, *l1leaf, *l2leaf; | 3292 | s8 *l0leaf, *l1leaf, *l2leaf; |
3293 | struct bmap *bmp = sbi->bmap; | 3293 | struct bmap *bmp = sbi->bmap; |
3294 | int agno, l2agsize, oldl2agsize; | 3294 | int agno, l2agsize, oldl2agsize; |
3295 | s64 ag_rem; | 3295 | s64 ag_rem; |
3296 | 3296 | ||
3297 | newsize = blkno + nblocks; | 3297 | newsize = blkno + nblocks; |
3298 | 3298 | ||
3299 | jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld", | 3299 | jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld", |
3300 | (long long) blkno, (long long) nblocks, (long long) newsize); | 3300 | (long long) blkno, (long long) nblocks, (long long) newsize); |
3301 | 3301 | ||
3302 | /* | 3302 | /* |
3303 | * initialize bmap control page. | 3303 | * initialize bmap control page. |
3304 | * | 3304 | * |
3305 | * all the data in bmap control page should exclude | 3305 | * all the data in bmap control page should exclude |
3306 | * the mkfs hidden dmap page. | 3306 | * the mkfs hidden dmap page. |
3307 | */ | 3307 | */ |
3308 | 3308 | ||
3309 | /* update mapsize */ | 3309 | /* update mapsize */ |
3310 | bmp->db_mapsize = newsize; | 3310 | bmp->db_mapsize = newsize; |
3311 | bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize); | 3311 | bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize); |
3312 | 3312 | ||
3313 | /* compute new AG size */ | 3313 | /* compute new AG size */ |
3314 | l2agsize = dbGetL2AGSize(newsize); | 3314 | l2agsize = dbGetL2AGSize(newsize); |
3315 | oldl2agsize = bmp->db_agl2size; | 3315 | oldl2agsize = bmp->db_agl2size; |
3316 | 3316 | ||
3317 | bmp->db_agl2size = l2agsize; | 3317 | bmp->db_agl2size = l2agsize; |
3318 | bmp->db_agsize = 1 << l2agsize; | 3318 | bmp->db_agsize = 1 << l2agsize; |
3319 | 3319 | ||
3320 | /* compute new number of AG */ | 3320 | /* compute new number of AG */ |
3321 | agno = bmp->db_numag; | 3321 | agno = bmp->db_numag; |
3322 | bmp->db_numag = newsize >> l2agsize; | 3322 | bmp->db_numag = newsize >> l2agsize; |
3323 | bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; | 3323 | bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; |
3324 | 3324 | ||
3325 | /* | 3325 | /* |
3326 | * reconfigure db_agfree[] | 3326 | * reconfigure db_agfree[] |
3327 | * from old AG configuration to new AG configuration; | 3327 | * from old AG configuration to new AG configuration; |
3328 | * | 3328 | * |
3329 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; | 3329 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; |
3330 | * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; | 3330 | * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; |
3331 | * note: new AG size = old AG size * (2**x). | 3331 | * note: new AG size = old AG size * (2**x). |
3332 | */ | 3332 | */ |
3333 | if (l2agsize == oldl2agsize) | 3333 | if (l2agsize == oldl2agsize) |
3334 | goto extend; | 3334 | goto extend; |
3335 | k = 1 << (l2agsize - oldl2agsize); | 3335 | k = 1 << (l2agsize - oldl2agsize); |
3336 | ag_rem = bmp->db_agfree[0]; /* save agfree[0] */ | 3336 | ag_rem = bmp->db_agfree[0]; /* save agfree[0] */ |
3337 | for (i = 0, n = 0; i < agno; n++) { | 3337 | for (i = 0, n = 0; i < agno; n++) { |
3338 | bmp->db_agfree[n] = 0; /* init collection point */ | 3338 | bmp->db_agfree[n] = 0; /* init collection point */ |
3339 | 3339 | ||
3340 | /* coalesce cotiguous k AGs; */ | 3340 | /* coalesce cotiguous k AGs; */ |
3341 | for (j = 0; j < k && i < agno; j++, i++) { | 3341 | for (j = 0; j < k && i < agno; j++, i++) { |
3342 | /* merge AGi to AGn */ | 3342 | /* merge AGi to AGn */ |
3343 | bmp->db_agfree[n] += bmp->db_agfree[i]; | 3343 | bmp->db_agfree[n] += bmp->db_agfree[i]; |
3344 | } | 3344 | } |
3345 | } | 3345 | } |
3346 | bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */ | 3346 | bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */ |
3347 | 3347 | ||
3348 | for (; n < MAXAG; n++) | 3348 | for (; n < MAXAG; n++) |
3349 | bmp->db_agfree[n] = 0; | 3349 | bmp->db_agfree[n] = 0; |
3350 | 3350 | ||
3351 | /* | 3351 | /* |
3352 | * update highest active ag number | 3352 | * update highest active ag number |
3353 | */ | 3353 | */ |
3354 | 3354 | ||
3355 | bmp->db_maxag = bmp->db_maxag / k; | 3355 | bmp->db_maxag = bmp->db_maxag / k; |
3356 | 3356 | ||
3357 | /* | 3357 | /* |
3358 | * extend bmap | 3358 | * extend bmap |
3359 | * | 3359 | * |
3360 | * update bit maps and corresponding level control pages; | 3360 | * update bit maps and corresponding level control pages; |
3361 | * global control page db_nfree, db_agfree[agno], db_maxfreebud; | 3361 | * global control page db_nfree, db_agfree[agno], db_maxfreebud; |
3362 | */ | 3362 | */ |
3363 | extend: | 3363 | extend: |
3364 | /* get L2 page */ | 3364 | /* get L2 page */ |
3365 | p = BMAPBLKNO + nbperpage; /* L2 page */ | 3365 | p = BMAPBLKNO + nbperpage; /* L2 page */ |
3366 | l2mp = read_metapage(ipbmap, p, PSIZE, 0); | 3366 | l2mp = read_metapage(ipbmap, p, PSIZE, 0); |
3367 | if (!l2mp) { | 3367 | if (!l2mp) { |
3368 | jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); | 3368 | jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); |
3369 | return -EIO; | 3369 | return -EIO; |
3370 | } | 3370 | } |
3371 | l2dcp = (struct dmapctl *) l2mp->data; | 3371 | l2dcp = (struct dmapctl *) l2mp->data; |
3372 | 3372 | ||
3373 | /* compute start L1 */ | 3373 | /* compute start L1 */ |
3374 | k = blkno >> L2MAXL1SIZE; | 3374 | k = blkno >> L2MAXL1SIZE; |
3375 | l2leaf = l2dcp->stree + CTLLEAFIND + k; | 3375 | l2leaf = l2dcp->stree + CTLLEAFIND + k; |
3376 | p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */ | 3376 | p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */ |
3377 | 3377 | ||
3378 | /* | 3378 | /* |
3379 | * extend each L1 in L2 | 3379 | * extend each L1 in L2 |
3380 | */ | 3380 | */ |
3381 | for (; k < LPERCTL; k++, p += nbperpage) { | 3381 | for (; k < LPERCTL; k++, p += nbperpage) { |
3382 | /* get L1 page */ | 3382 | /* get L1 page */ |
3383 | if (j0) { | 3383 | if (j0) { |
3384 | /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */ | 3384 | /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */ |
3385 | l1mp = read_metapage(ipbmap, p, PSIZE, 0); | 3385 | l1mp = read_metapage(ipbmap, p, PSIZE, 0); |
3386 | if (l1mp == NULL) | 3386 | if (l1mp == NULL) |
3387 | goto errout; | 3387 | goto errout; |
3388 | l1dcp = (struct dmapctl *) l1mp->data; | 3388 | l1dcp = (struct dmapctl *) l1mp->data; |
3389 | 3389 | ||
3390 | /* compute start L0 */ | 3390 | /* compute start L0 */ |
3391 | j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; | 3391 | j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; |
3392 | l1leaf = l1dcp->stree + CTLLEAFIND + j; | 3392 | l1leaf = l1dcp->stree + CTLLEAFIND + j; |
3393 | p = BLKTOL0(blkno, sbi->l2nbperpage); | 3393 | p = BLKTOL0(blkno, sbi->l2nbperpage); |
3394 | j0 = false; | 3394 | j0 = false; |
3395 | } else { | 3395 | } else { |
3396 | /* assign/init L1 page */ | 3396 | /* assign/init L1 page */ |
3397 | l1mp = get_metapage(ipbmap, p, PSIZE, 0); | 3397 | l1mp = get_metapage(ipbmap, p, PSIZE, 0); |
3398 | if (l1mp == NULL) | 3398 | if (l1mp == NULL) |
3399 | goto errout; | 3399 | goto errout; |
3400 | 3400 | ||
3401 | l1dcp = (struct dmapctl *) l1mp->data; | 3401 | l1dcp = (struct dmapctl *) l1mp->data; |
3402 | 3402 | ||
3403 | /* compute start L0 */ | 3403 | /* compute start L0 */ |
3404 | j = 0; | 3404 | j = 0; |
3405 | l1leaf = l1dcp->stree + CTLLEAFIND; | 3405 | l1leaf = l1dcp->stree + CTLLEAFIND; |
3406 | p += nbperpage; /* 1st L0 of L1.k */ | 3406 | p += nbperpage; /* 1st L0 of L1.k */ |
3407 | } | 3407 | } |
3408 | 3408 | ||
3409 | /* | 3409 | /* |
3410 | * extend each L0 in L1 | 3410 | * extend each L0 in L1 |
3411 | */ | 3411 | */ |
3412 | for (; j < LPERCTL; j++) { | 3412 | for (; j < LPERCTL; j++) { |
3413 | /* get L0 page */ | 3413 | /* get L0 page */ |
3414 | if (i0) { | 3414 | if (i0) { |
3415 | /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */ | 3415 | /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */ |
3416 | 3416 | ||
3417 | l0mp = read_metapage(ipbmap, p, PSIZE, 0); | 3417 | l0mp = read_metapage(ipbmap, p, PSIZE, 0); |
3418 | if (l0mp == NULL) | 3418 | if (l0mp == NULL) |
3419 | goto errout; | 3419 | goto errout; |
3420 | l0dcp = (struct dmapctl *) l0mp->data; | 3420 | l0dcp = (struct dmapctl *) l0mp->data; |
3421 | 3421 | ||
3422 | /* compute start dmap */ | 3422 | /* compute start dmap */ |
3423 | i = (blkno & (MAXL0SIZE - 1)) >> | 3423 | i = (blkno & (MAXL0SIZE - 1)) >> |
3424 | L2BPERDMAP; | 3424 | L2BPERDMAP; |
3425 | l0leaf = l0dcp->stree + CTLLEAFIND + i; | 3425 | l0leaf = l0dcp->stree + CTLLEAFIND + i; |
3426 | p = BLKTODMAP(blkno, | 3426 | p = BLKTODMAP(blkno, |
3427 | sbi->l2nbperpage); | 3427 | sbi->l2nbperpage); |
3428 | i0 = false; | 3428 | i0 = false; |
3429 | } else { | 3429 | } else { |
3430 | /* assign/init L0 page */ | 3430 | /* assign/init L0 page */ |
3431 | l0mp = get_metapage(ipbmap, p, PSIZE, 0); | 3431 | l0mp = get_metapage(ipbmap, p, PSIZE, 0); |
3432 | if (l0mp == NULL) | 3432 | if (l0mp == NULL) |
3433 | goto errout; | 3433 | goto errout; |
3434 | 3434 | ||
3435 | l0dcp = (struct dmapctl *) l0mp->data; | 3435 | l0dcp = (struct dmapctl *) l0mp->data; |
3436 | 3436 | ||
3437 | /* compute start dmap */ | 3437 | /* compute start dmap */ |
3438 | i = 0; | 3438 | i = 0; |
3439 | l0leaf = l0dcp->stree + CTLLEAFIND; | 3439 | l0leaf = l0dcp->stree + CTLLEAFIND; |
3440 | p += nbperpage; /* 1st dmap of L0.j */ | 3440 | p += nbperpage; /* 1st dmap of L0.j */ |
3441 | } | 3441 | } |
3442 | 3442 | ||
3443 | /* | 3443 | /* |
3444 | * extend each dmap in L0 | 3444 | * extend each dmap in L0 |
3445 | */ | 3445 | */ |
3446 | for (; i < LPERCTL; i++) { | 3446 | for (; i < LPERCTL; i++) { |
3447 | /* | 3447 | /* |
3448 | * reconstruct the dmap page, and | 3448 | * reconstruct the dmap page, and |
3449 | * initialize corresponding parent L0 leaf | 3449 | * initialize corresponding parent L0 leaf |
3450 | */ | 3450 | */ |
3451 | if ((n = blkno & (BPERDMAP - 1))) { | 3451 | if ((n = blkno & (BPERDMAP - 1))) { |
3452 | /* read in dmap page: */ | 3452 | /* read in dmap page: */ |
3453 | mp = read_metapage(ipbmap, p, | 3453 | mp = read_metapage(ipbmap, p, |
3454 | PSIZE, 0); | 3454 | PSIZE, 0); |
3455 | if (mp == NULL) | 3455 | if (mp == NULL) |
3456 | goto errout; | 3456 | goto errout; |
3457 | n = min(nblocks, (s64)BPERDMAP - n); | 3457 | n = min(nblocks, (s64)BPERDMAP - n); |
3458 | } else { | 3458 | } else { |
3459 | /* assign/init dmap page */ | 3459 | /* assign/init dmap page */ |
3460 | mp = read_metapage(ipbmap, p, | 3460 | mp = read_metapage(ipbmap, p, |
3461 | PSIZE, 0); | 3461 | PSIZE, 0); |
3462 | if (mp == NULL) | 3462 | if (mp == NULL) |
3463 | goto errout; | 3463 | goto errout; |
3464 | 3464 | ||
3465 | n = min(nblocks, (s64)BPERDMAP); | 3465 | n = min(nblocks, (s64)BPERDMAP); |
3466 | } | 3466 | } |
3467 | 3467 | ||
3468 | dp = (struct dmap *) mp->data; | 3468 | dp = (struct dmap *) mp->data; |
3469 | *l0leaf = dbInitDmap(dp, blkno, n); | 3469 | *l0leaf = dbInitDmap(dp, blkno, n); |
3470 | 3470 | ||
3471 | bmp->db_nfree += n; | 3471 | bmp->db_nfree += n; |
3472 | agno = le64_to_cpu(dp->start) >> l2agsize; | 3472 | agno = le64_to_cpu(dp->start) >> l2agsize; |
3473 | bmp->db_agfree[agno] += n; | 3473 | bmp->db_agfree[agno] += n; |
3474 | 3474 | ||
3475 | write_metapage(mp); | 3475 | write_metapage(mp); |
3476 | 3476 | ||
3477 | l0leaf++; | 3477 | l0leaf++; |
3478 | p += nbperpage; | 3478 | p += nbperpage; |
3479 | 3479 | ||
3480 | blkno += n; | 3480 | blkno += n; |
3481 | nblocks -= n; | 3481 | nblocks -= n; |
3482 | if (nblocks == 0) | 3482 | if (nblocks == 0) |
3483 | break; | 3483 | break; |
3484 | } /* for each dmap in a L0 */ | 3484 | } /* for each dmap in a L0 */ |
3485 | 3485 | ||
3486 | /* | 3486 | /* |
3487 | * build current L0 page from its leaves, and | 3487 | * build current L0 page from its leaves, and |
3488 | * initialize corresponding parent L1 leaf | 3488 | * initialize corresponding parent L1 leaf |
3489 | */ | 3489 | */ |
3490 | *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); | 3490 | *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); |
3491 | write_metapage(l0mp); | 3491 | write_metapage(l0mp); |
3492 | l0mp = NULL; | 3492 | l0mp = NULL; |
3493 | 3493 | ||
3494 | if (nblocks) | 3494 | if (nblocks) |
3495 | l1leaf++; /* continue for next L0 */ | 3495 | l1leaf++; /* continue for next L0 */ |
3496 | else { | 3496 | else { |
3497 | /* more than 1 L0 ? */ | 3497 | /* more than 1 L0 ? */ |
3498 | if (j > 0) | 3498 | if (j > 0) |
3499 | break; /* build L1 page */ | 3499 | break; /* build L1 page */ |
3500 | else { | 3500 | else { |
3501 | /* summarize in global bmap page */ | 3501 | /* summarize in global bmap page */ |
3502 | bmp->db_maxfreebud = *l1leaf; | 3502 | bmp->db_maxfreebud = *l1leaf; |
3503 | release_metapage(l1mp); | 3503 | release_metapage(l1mp); |
3504 | release_metapage(l2mp); | 3504 | release_metapage(l2mp); |
3505 | goto finalize; | 3505 | goto finalize; |
3506 | } | 3506 | } |
3507 | } | 3507 | } |
3508 | } /* for each L0 in a L1 */ | 3508 | } /* for each L0 in a L1 */ |
3509 | 3509 | ||
3510 | /* | 3510 | /* |
3511 | * build current L1 page from its leaves, and | 3511 | * build current L1 page from its leaves, and |
3512 | * initialize corresponding parent L2 leaf | 3512 | * initialize corresponding parent L2 leaf |
3513 | */ | 3513 | */ |
3514 | *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); | 3514 | *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); |
3515 | write_metapage(l1mp); | 3515 | write_metapage(l1mp); |
3516 | l1mp = NULL; | 3516 | l1mp = NULL; |
3517 | 3517 | ||
3518 | if (nblocks) | 3518 | if (nblocks) |
3519 | l2leaf++; /* continue for next L1 */ | 3519 | l2leaf++; /* continue for next L1 */ |
3520 | else { | 3520 | else { |
3521 | /* more than 1 L1 ? */ | 3521 | /* more than 1 L1 ? */ |
3522 | if (k > 0) | 3522 | if (k > 0) |
3523 | break; /* build L2 page */ | 3523 | break; /* build L2 page */ |
3524 | else { | 3524 | else { |
3525 | /* summarize in global bmap page */ | 3525 | /* summarize in global bmap page */ |
3526 | bmp->db_maxfreebud = *l2leaf; | 3526 | bmp->db_maxfreebud = *l2leaf; |
3527 | release_metapage(l2mp); | 3527 | release_metapage(l2mp); |
3528 | goto finalize; | 3528 | goto finalize; |
3529 | } | 3529 | } |
3530 | } | 3530 | } |
3531 | } /* for each L1 in a L2 */ | 3531 | } /* for each L1 in a L2 */ |
3532 | 3532 | ||
3533 | jfs_error(ipbmap->i_sb, | 3533 | jfs_error(ipbmap->i_sb, |
3534 | "dbExtendFS: function has not returned as expected"); | 3534 | "dbExtendFS: function has not returned as expected"); |
3535 | errout: | 3535 | errout: |
3536 | if (l0mp) | 3536 | if (l0mp) |
3537 | release_metapage(l0mp); | 3537 | release_metapage(l0mp); |
3538 | if (l1mp) | 3538 | if (l1mp) |
3539 | release_metapage(l1mp); | 3539 | release_metapage(l1mp); |
3540 | release_metapage(l2mp); | 3540 | release_metapage(l2mp); |
3541 | return -EIO; | 3541 | return -EIO; |
3542 | 3542 | ||
3543 | /* | 3543 | /* |
3544 | * finalize bmap control page | 3544 | * finalize bmap control page |
3545 | */ | 3545 | */ |
3546 | finalize: | 3546 | finalize: |
3547 | 3547 | ||
3548 | return 0; | 3548 | return 0; |
3549 | } | 3549 | } |
3550 | 3550 | ||
3551 | 3551 | ||
3552 | /* | 3552 | /* |
3553 | * dbFinalizeBmap() | 3553 | * dbFinalizeBmap() |
3554 | */ | 3554 | */ |
3555 | void dbFinalizeBmap(struct inode *ipbmap) | 3555 | void dbFinalizeBmap(struct inode *ipbmap) |
3556 | { | 3556 | { |
3557 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 3557 | struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; |
3558 | int actags, inactags, l2nl; | 3558 | int actags, inactags, l2nl; |
3559 | s64 ag_rem, actfree, inactfree, avgfree; | 3559 | s64 ag_rem, actfree, inactfree, avgfree; |
3560 | int i, n; | 3560 | int i, n; |
3561 | 3561 | ||
3562 | /* | 3562 | /* |
3563 | * finalize bmap control page | 3563 | * finalize bmap control page |
3564 | */ | 3564 | */ |
3565 | //finalize: | 3565 | //finalize: |
3566 | /* | 3566 | /* |
3567 | * compute db_agpref: preferred ag to allocate from | 3567 | * compute db_agpref: preferred ag to allocate from |
3568 | * (the leftmost ag with average free space in it); | 3568 | * (the leftmost ag with average free space in it); |
3569 | */ | 3569 | */ |
3570 | //agpref: | 3570 | //agpref: |
3571 | /* get the number of active ags and inacitve ags */ | 3571 | /* get the number of active ags and inacitve ags */ |
3572 | actags = bmp->db_maxag + 1; | 3572 | actags = bmp->db_maxag + 1; |
3573 | inactags = bmp->db_numag - actags; | 3573 | inactags = bmp->db_numag - actags; |
3574 | ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */ | 3574 | ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */ |
3575 | 3575 | ||
3576 | /* determine how many blocks are in the inactive allocation | 3576 | /* determine how many blocks are in the inactive allocation |
3577 | * groups. in doing this, we must account for the fact that | 3577 | * groups. in doing this, we must account for the fact that |
3578 | * the rightmost group might be a partial group (i.e. file | 3578 | * the rightmost group might be a partial group (i.e. file |
3579 | * system size is not a multiple of the group size). | 3579 | * system size is not a multiple of the group size). |
3580 | */ | 3580 | */ |
3581 | inactfree = (inactags && ag_rem) ? | 3581 | inactfree = (inactags && ag_rem) ? |
3582 | ((inactags - 1) << bmp->db_agl2size) + ag_rem | 3582 | ((inactags - 1) << bmp->db_agl2size) + ag_rem |
3583 | : inactags << bmp->db_agl2size; | 3583 | : inactags << bmp->db_agl2size; |
3584 | 3584 | ||
3585 | /* determine how many free blocks are in the active | 3585 | /* determine how many free blocks are in the active |
3586 | * allocation groups plus the average number of free blocks | 3586 | * allocation groups plus the average number of free blocks |
3587 | * within the active ags. | 3587 | * within the active ags. |
3588 | */ | 3588 | */ |
3589 | actfree = bmp->db_nfree - inactfree; | 3589 | actfree = bmp->db_nfree - inactfree; |
3590 | avgfree = (u32) actfree / (u32) actags; | 3590 | avgfree = (u32) actfree / (u32) actags; |
3591 | 3591 | ||
3592 | /* if the preferred allocation group has not average free space. | 3592 | /* if the preferred allocation group has not average free space. |
3593 | * re-establish the preferred group as the leftmost | 3593 | * re-establish the preferred group as the leftmost |
3594 | * group with average free space. | 3594 | * group with average free space. |
3595 | */ | 3595 | */ |
3596 | if (bmp->db_agfree[bmp->db_agpref] < avgfree) { | 3596 | if (bmp->db_agfree[bmp->db_agpref] < avgfree) { |
3597 | for (bmp->db_agpref = 0; bmp->db_agpref < actags; | 3597 | for (bmp->db_agpref = 0; bmp->db_agpref < actags; |
3598 | bmp->db_agpref++) { | 3598 | bmp->db_agpref++) { |
3599 | if (bmp->db_agfree[bmp->db_agpref] >= avgfree) | 3599 | if (bmp->db_agfree[bmp->db_agpref] >= avgfree) |
3600 | break; | 3600 | break; |
3601 | } | 3601 | } |
3602 | if (bmp->db_agpref >= bmp->db_numag) { | 3602 | if (bmp->db_agpref >= bmp->db_numag) { |
3603 | jfs_error(ipbmap->i_sb, | 3603 | jfs_error(ipbmap->i_sb, |
3604 | "cannot find ag with average freespace"); | 3604 | "cannot find ag with average freespace"); |
3605 | } | 3605 | } |
3606 | } | 3606 | } |
3607 | 3607 | ||
3608 | /* | 3608 | /* |
3609 | * compute db_aglevel, db_agheigth, db_width, db_agstart: | 3609 | * compute db_aglevel, db_agheigth, db_width, db_agstart: |
3610 | * an ag is covered in aglevel dmapctl summary tree, | 3610 | * an ag is covered in aglevel dmapctl summary tree, |
3611 | * at agheight level height (from leaf) with agwidth number of nodes | 3611 | * at agheight level height (from leaf) with agwidth number of nodes |
3612 | * each, which starts at agstart index node of the smmary tree node | 3612 | * each, which starts at agstart index node of the smmary tree node |
3613 | * array; | 3613 | * array; |
3614 | */ | 3614 | */ |
3615 | bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); | 3615 | bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); |
3616 | l2nl = | 3616 | l2nl = |
3617 | bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); | 3617 | bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); |
3618 | bmp->db_agheigth = l2nl >> 1; | 3618 | bmp->db_agheigth = l2nl >> 1; |
3619 | bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); | 3619 | bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); |
3620 | for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; | 3620 | for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; |
3621 | i--) { | 3621 | i--) { |
3622 | bmp->db_agstart += n; | 3622 | bmp->db_agstart += n; |
3623 | n <<= 2; | 3623 | n <<= 2; |
3624 | } | 3624 | } |
3625 | 3625 | ||
3626 | } | 3626 | } |
3627 | 3627 | ||
3628 | 3628 | ||
3629 | /* | 3629 | /* |
3630 | * NAME: dbInitDmap()/ujfs_idmap_page() | 3630 | * NAME: dbInitDmap()/ujfs_idmap_page() |
3631 | * | 3631 | * |
3632 | * FUNCTION: initialize working/persistent bitmap of the dmap page | 3632 | * FUNCTION: initialize working/persistent bitmap of the dmap page |
3633 | * for the specified number of blocks: | 3633 | * for the specified number of blocks: |
3634 | * | 3634 | * |
3635 | * at entry, the bitmaps had been initialized as free (ZEROS); | 3635 | * at entry, the bitmaps had been initialized as free (ZEROS); |
3636 | * The number of blocks will only account for the actually | 3636 | * The number of blocks will only account for the actually |
3637 | * existing blocks. Blocks which don't actually exist in | 3637 | * existing blocks. Blocks which don't actually exist in |
3638 | * the aggregate will be marked as allocated (ONES); | 3638 | * the aggregate will be marked as allocated (ONES); |
3639 | * | 3639 | * |
3640 | * PARAMETERS: | 3640 | * PARAMETERS: |
3641 | * dp - pointer to page of map | 3641 | * dp - pointer to page of map |
3642 | * nblocks - number of blocks this page | 3642 | * nblocks - number of blocks this page |
3643 | * | 3643 | * |
3644 | * RETURNS: NONE | 3644 | * RETURNS: NONE |
3645 | */ | 3645 | */ |
3646 | static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) | 3646 | static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) |
3647 | { | 3647 | { |
3648 | int blkno, w, b, r, nw, nb, i; | 3648 | int blkno, w, b, r, nw, nb, i; |
3649 | 3649 | ||
3650 | /* starting block number within the dmap */ | 3650 | /* starting block number within the dmap */ |
3651 | blkno = Blkno & (BPERDMAP - 1); | 3651 | blkno = Blkno & (BPERDMAP - 1); |
3652 | 3652 | ||
3653 | if (blkno == 0) { | 3653 | if (blkno == 0) { |
3654 | dp->nblocks = dp->nfree = cpu_to_le32(nblocks); | 3654 | dp->nblocks = dp->nfree = cpu_to_le32(nblocks); |
3655 | dp->start = cpu_to_le64(Blkno); | 3655 | dp->start = cpu_to_le64(Blkno); |
3656 | 3656 | ||
3657 | if (nblocks == BPERDMAP) { | 3657 | if (nblocks == BPERDMAP) { |
3658 | memset(&dp->wmap[0], 0, LPERDMAP * 4); | 3658 | memset(&dp->wmap[0], 0, LPERDMAP * 4); |
3659 | memset(&dp->pmap[0], 0, LPERDMAP * 4); | 3659 | memset(&dp->pmap[0], 0, LPERDMAP * 4); |
3660 | goto initTree; | 3660 | goto initTree; |
3661 | } | 3661 | } |
3662 | } else { | 3662 | } else { |
3663 | dp->nblocks = | 3663 | le32_add_cpu(&dp->nblocks, nblocks); |
3664 | cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks); | 3664 | le32_add_cpu(&dp->nfree, nblocks); |
3665 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); | ||
3666 | } | 3665 | } |
3667 | 3666 | ||
3668 | /* word number containing start block number */ | 3667 | /* word number containing start block number */ |
3669 | w = blkno >> L2DBWORD; | 3668 | w = blkno >> L2DBWORD; |
3670 | 3669 | ||
3671 | /* | 3670 | /* |
3672 | * free the bits corresponding to the block range (ZEROS): | 3671 | * free the bits corresponding to the block range (ZEROS): |
3673 | * note: not all bits of the first and last words may be contained | 3672 | * note: not all bits of the first and last words may be contained |
3674 | * within the block range. | 3673 | * within the block range. |
3675 | */ | 3674 | */ |
3676 | for (r = nblocks; r > 0; r -= nb, blkno += nb) { | 3675 | for (r = nblocks; r > 0; r -= nb, blkno += nb) { |
3677 | /* number of bits preceding range to be freed in the word */ | 3676 | /* number of bits preceding range to be freed in the word */ |
3678 | b = blkno & (DBWORD - 1); | 3677 | b = blkno & (DBWORD - 1); |
3679 | /* number of bits to free in the word */ | 3678 | /* number of bits to free in the word */ |
3680 | nb = min(r, DBWORD - b); | 3679 | nb = min(r, DBWORD - b); |
3681 | 3680 | ||
3682 | /* is partial word to be freed ? */ | 3681 | /* is partial word to be freed ? */ |
3683 | if (nb < DBWORD) { | 3682 | if (nb < DBWORD) { |
3684 | /* free (set to 0) from the bitmap word */ | 3683 | /* free (set to 0) from the bitmap word */ |
3685 | dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) | 3684 | dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) |
3686 | >> b)); | 3685 | >> b)); |
3687 | dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) | 3686 | dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) |
3688 | >> b)); | 3687 | >> b)); |
3689 | 3688 | ||
3690 | /* skip the word freed */ | 3689 | /* skip the word freed */ |
3691 | w++; | 3690 | w++; |
3692 | } else { | 3691 | } else { |
3693 | /* free (set to 0) contiguous bitmap words */ | 3692 | /* free (set to 0) contiguous bitmap words */ |
3694 | nw = r >> L2DBWORD; | 3693 | nw = r >> L2DBWORD; |
3695 | memset(&dp->wmap[w], 0, nw * 4); | 3694 | memset(&dp->wmap[w], 0, nw * 4); |
3696 | memset(&dp->pmap[w], 0, nw * 4); | 3695 | memset(&dp->pmap[w], 0, nw * 4); |
3697 | 3696 | ||
3698 | /* skip the words freed */ | 3697 | /* skip the words freed */ |
3699 | nb = nw << L2DBWORD; | 3698 | nb = nw << L2DBWORD; |
3700 | w += nw; | 3699 | w += nw; |
3701 | } | 3700 | } |
3702 | } | 3701 | } |
3703 | 3702 | ||
3704 | /* | 3703 | /* |
3705 | * mark bits following the range to be freed (non-existing | 3704 | * mark bits following the range to be freed (non-existing |
3706 | * blocks) as allocated (ONES) | 3705 | * blocks) as allocated (ONES) |
3707 | */ | 3706 | */ |
3708 | 3707 | ||
3709 | if (blkno == BPERDMAP) | 3708 | if (blkno == BPERDMAP) |
3710 | goto initTree; | 3709 | goto initTree; |
3711 | 3710 | ||
3712 | /* the first word beyond the end of existing blocks */ | 3711 | /* the first word beyond the end of existing blocks */ |
3713 | w = blkno >> L2DBWORD; | 3712 | w = blkno >> L2DBWORD; |
3714 | 3713 | ||
3715 | /* does nblocks fall on a 32-bit boundary ? */ | 3714 | /* does nblocks fall on a 32-bit boundary ? */ |
3716 | b = blkno & (DBWORD - 1); | 3715 | b = blkno & (DBWORD - 1); |
3717 | if (b) { | 3716 | if (b) { |
3718 | /* mark a partial word allocated */ | 3717 | /* mark a partial word allocated */ |
3719 | dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b); | 3718 | dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b); |
3720 | w++; | 3719 | w++; |
3721 | } | 3720 | } |
3722 | 3721 | ||
3723 | /* set the rest of the words in the page to allocated (ONES) */ | 3722 | /* set the rest of the words in the page to allocated (ONES) */ |
3724 | for (i = w; i < LPERDMAP; i++) | 3723 | for (i = w; i < LPERDMAP; i++) |
3725 | dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES); | 3724 | dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES); |
3726 | 3725 | ||
3727 | /* | 3726 | /* |
3728 | * init tree | 3727 | * init tree |
3729 | */ | 3728 | */ |
3730 | initTree: | 3729 | initTree: |
3731 | return (dbInitDmapTree(dp)); | 3730 | return (dbInitDmapTree(dp)); |
3732 | } | 3731 | } |
3733 | 3732 | ||
3734 | 3733 | ||
3735 | /* | 3734 | /* |
3736 | * NAME: dbInitDmapTree()/ujfs_complete_dmap() | 3735 | * NAME: dbInitDmapTree()/ujfs_complete_dmap() |
3737 | * | 3736 | * |
3738 | * FUNCTION: initialize summary tree of the specified dmap: | 3737 | * FUNCTION: initialize summary tree of the specified dmap: |
3739 | * | 3738 | * |
3740 | * at entry, bitmap of the dmap has been initialized; | 3739 | * at entry, bitmap of the dmap has been initialized; |
3741 | * | 3740 | * |
3742 | * PARAMETERS: | 3741 | * PARAMETERS: |
3743 | * dp - dmap to complete | 3742 | * dp - dmap to complete |
3744 | * blkno - starting block number for this dmap | 3743 | * blkno - starting block number for this dmap |
3745 | * treemax - will be filled in with max free for this dmap | 3744 | * treemax - will be filled in with max free for this dmap |
3746 | * | 3745 | * |
3747 | * RETURNS: max free string at the root of the tree | 3746 | * RETURNS: max free string at the root of the tree |
3748 | */ | 3747 | */ |
3749 | static int dbInitDmapTree(struct dmap * dp) | 3748 | static int dbInitDmapTree(struct dmap * dp) |
3750 | { | 3749 | { |
3751 | struct dmaptree *tp; | 3750 | struct dmaptree *tp; |
3752 | s8 *cp; | 3751 | s8 *cp; |
3753 | int i; | 3752 | int i; |
3754 | 3753 | ||
3755 | /* init fixed info of tree */ | 3754 | /* init fixed info of tree */ |
3756 | tp = &dp->tree; | 3755 | tp = &dp->tree; |
3757 | tp->nleafs = cpu_to_le32(LPERDMAP); | 3756 | tp->nleafs = cpu_to_le32(LPERDMAP); |
3758 | tp->l2nleafs = cpu_to_le32(L2LPERDMAP); | 3757 | tp->l2nleafs = cpu_to_le32(L2LPERDMAP); |
3759 | tp->leafidx = cpu_to_le32(LEAFIND); | 3758 | tp->leafidx = cpu_to_le32(LEAFIND); |
3760 | tp->height = cpu_to_le32(4); | 3759 | tp->height = cpu_to_le32(4); |
3761 | tp->budmin = BUDMIN; | 3760 | tp->budmin = BUDMIN; |
3762 | 3761 | ||
3763 | /* init each leaf from corresponding wmap word: | 3762 | /* init each leaf from corresponding wmap word: |
3764 | * note: leaf is set to NOFREE(-1) if all blocks of corresponding | 3763 | * note: leaf is set to NOFREE(-1) if all blocks of corresponding |
3765 | * bitmap word are allocated. | 3764 | * bitmap word are allocated. |
3766 | */ | 3765 | */ |
3767 | cp = tp->stree + le32_to_cpu(tp->leafidx); | 3766 | cp = tp->stree + le32_to_cpu(tp->leafidx); |
3768 | for (i = 0; i < LPERDMAP; i++) | 3767 | for (i = 0; i < LPERDMAP; i++) |
3769 | *cp++ = dbMaxBud((u8 *) & dp->wmap[i]); | 3768 | *cp++ = dbMaxBud((u8 *) & dp->wmap[i]); |
3770 | 3769 | ||
3771 | /* build the dmap's binary buddy summary tree */ | 3770 | /* build the dmap's binary buddy summary tree */ |
3772 | return (dbInitTree(tp)); | 3771 | return (dbInitTree(tp)); |
3773 | } | 3772 | } |
3774 | 3773 | ||
3775 | 3774 | ||
3776 | /* | 3775 | /* |
3777 | * NAME: dbInitTree()/ujfs_adjtree() | 3776 | * NAME: dbInitTree()/ujfs_adjtree() |
3778 | * | 3777 | * |
3779 | * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. | 3778 | * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. |
3780 | * | 3779 | * |
3781 | * at entry, the leaves of the tree has been initialized | 3780 | * at entry, the leaves of the tree has been initialized |
3782 | * from corresponding bitmap word or root of summary tree | 3781 | * from corresponding bitmap word or root of summary tree |
3783 | * of the child control page; | 3782 | * of the child control page; |
3784 | * configure binary buddy system at the leaf level, then | 3783 | * configure binary buddy system at the leaf level, then |
3785 | * bubble up the values of the leaf nodes up the tree. | 3784 | * bubble up the values of the leaf nodes up the tree. |
3786 | * | 3785 | * |
3787 | * PARAMETERS: | 3786 | * PARAMETERS: |
3788 | * cp - Pointer to the root of the tree | 3787 | * cp - Pointer to the root of the tree |
3789 | * l2leaves- Number of leaf nodes as a power of 2 | 3788 | * l2leaves- Number of leaf nodes as a power of 2 |
3790 | * l2min - Number of blocks that can be covered by a leaf | 3789 | * l2min - Number of blocks that can be covered by a leaf |
3791 | * as a power of 2 | 3790 | * as a power of 2 |
3792 | * | 3791 | * |
3793 | * RETURNS: max free string at the root of the tree | 3792 | * RETURNS: max free string at the root of the tree |
3794 | */ | 3793 | */ |
3795 | static int dbInitTree(struct dmaptree * dtp) | 3794 | static int dbInitTree(struct dmaptree * dtp) |
3796 | { | 3795 | { |
3797 | int l2max, l2free, bsize, nextb, i; | 3796 | int l2max, l2free, bsize, nextb, i; |
3798 | int child, parent, nparent; | 3797 | int child, parent, nparent; |
3799 | s8 *tp, *cp, *cp1; | 3798 | s8 *tp, *cp, *cp1; |
3800 | 3799 | ||
3801 | tp = dtp->stree; | 3800 | tp = dtp->stree; |
3802 | 3801 | ||
3803 | /* Determine the maximum free string possible for the leaves */ | 3802 | /* Determine the maximum free string possible for the leaves */ |
3804 | l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin; | 3803 | l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin; |
3805 | 3804 | ||
3806 | /* | 3805 | /* |
3807 | * configure the leaf levevl into binary buddy system | 3806 | * configure the leaf levevl into binary buddy system |
3808 | * | 3807 | * |
3809 | * Try to combine buddies starting with a buddy size of 1 | 3808 | * Try to combine buddies starting with a buddy size of 1 |
3810 | * (i.e. two leaves). At a buddy size of 1 two buddy leaves | 3809 | * (i.e. two leaves). At a buddy size of 1 two buddy leaves |
3811 | * can be combined if both buddies have a maximum free of l2min; | 3810 | * can be combined if both buddies have a maximum free of l2min; |
3812 | * the combination will result in the left-most buddy leaf having | 3811 | * the combination will result in the left-most buddy leaf having |
3813 | * a maximum free of l2min+1. | 3812 | * a maximum free of l2min+1. |
3814 | * After processing all buddies for a given size, process buddies | 3813 | * After processing all buddies for a given size, process buddies |
3815 | * at the next higher buddy size (i.e. current size * 2) and | 3814 | * at the next higher buddy size (i.e. current size * 2) and |
3816 | * the next maximum free (current free + 1). | 3815 | * the next maximum free (current free + 1). |
3817 | * This continues until the maximum possible buddy combination | 3816 | * This continues until the maximum possible buddy combination |
3818 | * yields maximum free. | 3817 | * yields maximum free. |
3819 | */ | 3818 | */ |
3820 | for (l2free = dtp->budmin, bsize = 1; l2free < l2max; | 3819 | for (l2free = dtp->budmin, bsize = 1; l2free < l2max; |
3821 | l2free++, bsize = nextb) { | 3820 | l2free++, bsize = nextb) { |
3822 | /* get next buddy size == current buddy pair size */ | 3821 | /* get next buddy size == current buddy pair size */ |
3823 | nextb = bsize << 1; | 3822 | nextb = bsize << 1; |
3824 | 3823 | ||
3825 | /* scan each adjacent buddy pair at current buddy size */ | 3824 | /* scan each adjacent buddy pair at current buddy size */ |
3826 | for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx); | 3825 | for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx); |
3827 | i < le32_to_cpu(dtp->nleafs); | 3826 | i < le32_to_cpu(dtp->nleafs); |
3828 | i += nextb, cp += nextb) { | 3827 | i += nextb, cp += nextb) { |
3829 | /* coalesce if both adjacent buddies are max free */ | 3828 | /* coalesce if both adjacent buddies are max free */ |
3830 | if (*cp == l2free && *(cp + bsize) == l2free) { | 3829 | if (*cp == l2free && *(cp + bsize) == l2free) { |
3831 | *cp = l2free + 1; /* left take right */ | 3830 | *cp = l2free + 1; /* left take right */ |
3832 | *(cp + bsize) = -1; /* right give left */ | 3831 | *(cp + bsize) = -1; /* right give left */ |
3833 | } | 3832 | } |
3834 | } | 3833 | } |
3835 | } | 3834 | } |
3836 | 3835 | ||
3837 | /* | 3836 | /* |
3838 | * bubble summary information of leaves up the tree. | 3837 | * bubble summary information of leaves up the tree. |
3839 | * | 3838 | * |
3840 | * Starting at the leaf node level, the four nodes described by | 3839 | * Starting at the leaf node level, the four nodes described by |
3841 | * the higher level parent node are compared for a maximum free and | 3840 | * the higher level parent node are compared for a maximum free and |
3842 | * this maximum becomes the value of the parent node. | 3841 | * this maximum becomes the value of the parent node. |
3843 | * when all lower level nodes are processed in this fashion then | 3842 | * when all lower level nodes are processed in this fashion then |
3844 | * move up to the next level (parent becomes a lower level node) and | 3843 | * move up to the next level (parent becomes a lower level node) and |
3845 | * continue the process for that level. | 3844 | * continue the process for that level. |
3846 | */ | 3845 | */ |
3847 | for (child = le32_to_cpu(dtp->leafidx), | 3846 | for (child = le32_to_cpu(dtp->leafidx), |
3848 | nparent = le32_to_cpu(dtp->nleafs) >> 2; | 3847 | nparent = le32_to_cpu(dtp->nleafs) >> 2; |
3849 | nparent > 0; nparent >>= 2, child = parent) { | 3848 | nparent > 0; nparent >>= 2, child = parent) { |
3850 | /* get index of 1st node of parent level */ | 3849 | /* get index of 1st node of parent level */ |
3851 | parent = (child - 1) >> 2; | 3850 | parent = (child - 1) >> 2; |
3852 | 3851 | ||
3853 | /* set the value of the parent node as the maximum | 3852 | /* set the value of the parent node as the maximum |
3854 | * of the four nodes of the current level. | 3853 | * of the four nodes of the current level. |
3855 | */ | 3854 | */ |
3856 | for (i = 0, cp = tp + child, cp1 = tp + parent; | 3855 | for (i = 0, cp = tp + child, cp1 = tp + parent; |
3857 | i < nparent; i++, cp += 4, cp1++) | 3856 | i < nparent; i++, cp += 4, cp1++) |
3858 | *cp1 = TREEMAX(cp); | 3857 | *cp1 = TREEMAX(cp); |
3859 | } | 3858 | } |
3860 | 3859 | ||
3861 | return (*tp); | 3860 | return (*tp); |
3862 | } | 3861 | } |
3863 | 3862 | ||
3864 | 3863 | ||
3865 | /* | 3864 | /* |
3866 | * dbInitDmapCtl() | 3865 | * dbInitDmapCtl() |
3867 | * | 3866 | * |
3868 | * function: initialize dmapctl page | 3867 | * function: initialize dmapctl page |
3869 | */ | 3868 | */ |
3870 | static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) | 3869 | static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) |
3871 | { /* start leaf index not covered by range */ | 3870 | { /* start leaf index not covered by range */ |
3872 | s8 *cp; | 3871 | s8 *cp; |
3873 | 3872 | ||
3874 | dcp->nleafs = cpu_to_le32(LPERCTL); | 3873 | dcp->nleafs = cpu_to_le32(LPERCTL); |
3875 | dcp->l2nleafs = cpu_to_le32(L2LPERCTL); | 3874 | dcp->l2nleafs = cpu_to_le32(L2LPERCTL); |
3876 | dcp->leafidx = cpu_to_le32(CTLLEAFIND); | 3875 | dcp->leafidx = cpu_to_le32(CTLLEAFIND); |
3877 | dcp->height = cpu_to_le32(5); | 3876 | dcp->height = cpu_to_le32(5); |
3878 | dcp->budmin = L2BPERDMAP + L2LPERCTL * level; | 3877 | dcp->budmin = L2BPERDMAP + L2LPERCTL * level; |
3879 | 3878 | ||
3880 | /* | 3879 | /* |
3881 | * initialize the leaves of current level that were not covered | 3880 | * initialize the leaves of current level that were not covered |
3882 | * by the specified input block range (i.e. the leaves have no | 3881 | * by the specified input block range (i.e. the leaves have no |
3883 | * low level dmapctl or dmap). | 3882 | * low level dmapctl or dmap). |
3884 | */ | 3883 | */ |
3885 | cp = &dcp->stree[CTLLEAFIND + i]; | 3884 | cp = &dcp->stree[CTLLEAFIND + i]; |
3886 | for (; i < LPERCTL; i++) | 3885 | for (; i < LPERCTL; i++) |
3887 | *cp++ = NOFREE; | 3886 | *cp++ = NOFREE; |
3888 | 3887 | ||
3889 | /* build the dmap's binary buddy summary tree */ | 3888 | /* build the dmap's binary buddy summary tree */ |
3890 | return (dbInitTree((struct dmaptree *) dcp)); | 3889 | return (dbInitTree((struct dmaptree *) dcp)); |
3891 | } | 3890 | } |
3892 | 3891 | ||
3893 | 3892 | ||
3894 | /* | 3893 | /* |
3895 | * NAME: dbGetL2AGSize()/ujfs_getagl2size() | 3894 | * NAME: dbGetL2AGSize()/ujfs_getagl2size() |
3896 | * | 3895 | * |
3897 | * FUNCTION: Determine log2(allocation group size) from aggregate size | 3896 | * FUNCTION: Determine log2(allocation group size) from aggregate size |
3898 | * | 3897 | * |
3899 | * PARAMETERS: | 3898 | * PARAMETERS: |
3900 | * nblocks - Number of blocks in aggregate | 3899 | * nblocks - Number of blocks in aggregate |
3901 | * | 3900 | * |
3902 | * RETURNS: log2(allocation group size) in aggregate blocks | 3901 | * RETURNS: log2(allocation group size) in aggregate blocks |
3903 | */ | 3902 | */ |
3904 | static int dbGetL2AGSize(s64 nblocks) | 3903 | static int dbGetL2AGSize(s64 nblocks) |
3905 | { | 3904 | { |
3906 | s64 sz; | 3905 | s64 sz; |
3907 | s64 m; | 3906 | s64 m; |
3908 | int l2sz; | 3907 | int l2sz; |
3909 | 3908 | ||
3910 | if (nblocks < BPERDMAP * MAXAG) | 3909 | if (nblocks < BPERDMAP * MAXAG) |
3911 | return (L2BPERDMAP); | 3910 | return (L2BPERDMAP); |
3912 | 3911 | ||
3913 | /* round up aggregate size to power of 2 */ | 3912 | /* round up aggregate size to power of 2 */ |
3914 | m = ((u64) 1 << (64 - 1)); | 3913 | m = ((u64) 1 << (64 - 1)); |
3915 | for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) { | 3914 | for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) { |
3916 | if (m & nblocks) | 3915 | if (m & nblocks) |
3917 | break; | 3916 | break; |
3918 | } | 3917 | } |
3919 | 3918 | ||
3920 | sz = (s64) 1 << l2sz; | 3919 | sz = (s64) 1 << l2sz; |
3921 | if (sz < nblocks) | 3920 | if (sz < nblocks) |
3922 | l2sz += 1; | 3921 | l2sz += 1; |
3923 | 3922 | ||
3924 | /* agsize = roundupSize/max_number_of_ag */ | 3923 | /* agsize = roundupSize/max_number_of_ag */ |
3925 | return (l2sz - L2MAXAG); | 3924 | return (l2sz - L2MAXAG); |
3926 | } | 3925 | } |
3927 | 3926 | ||
3928 | 3927 | ||
3929 | /* | 3928 | /* |
3930 | * NAME: dbMapFileSizeToMapSize() | 3929 | * NAME: dbMapFileSizeToMapSize() |
3931 | * | 3930 | * |
3932 | * FUNCTION: compute number of blocks the block allocation map file | 3931 | * FUNCTION: compute number of blocks the block allocation map file |
3933 | * can cover from the map file size; | 3932 | * can cover from the map file size; |
3934 | * | 3933 | * |
3935 | * RETURNS: Number of blocks which can be covered by this block map file; | 3934 | * RETURNS: Number of blocks which can be covered by this block map file; |
3936 | */ | 3935 | */ |
3937 | 3936 | ||
3938 | /* | 3937 | /* |
3939 | * maximum number of map pages at each level including control pages | 3938 | * maximum number of map pages at each level including control pages |
3940 | */ | 3939 | */ |
3941 | #define MAXL0PAGES (1 + LPERCTL) | 3940 | #define MAXL0PAGES (1 + LPERCTL) |
3942 | #define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES) | 3941 | #define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES) |
3943 | #define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES) | 3942 | #define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES) |
3944 | 3943 | ||
3945 | /* | 3944 | /* |
3946 | * convert number of map pages to the zero origin top dmapctl level | 3945 | * convert number of map pages to the zero origin top dmapctl level |
3947 | */ | 3946 | */ |
3948 | #define BMAPPGTOLEV(npages) \ | 3947 | #define BMAPPGTOLEV(npages) \ |
3949 | (((npages) <= 3 + MAXL0PAGES) ? 0 : \ | 3948 | (((npages) <= 3 + MAXL0PAGES) ? 0 : \ |
3950 | ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) | 3949 | ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) |
3951 | 3950 | ||
3952 | s64 dbMapFileSizeToMapSize(struct inode * ipbmap) | 3951 | s64 dbMapFileSizeToMapSize(struct inode * ipbmap) |
3953 | { | 3952 | { |
3954 | struct super_block *sb = ipbmap->i_sb; | 3953 | struct super_block *sb = ipbmap->i_sb; |
3955 | s64 nblocks; | 3954 | s64 nblocks; |
3956 | s64 npages, ndmaps; | 3955 | s64 npages, ndmaps; |
3957 | int level, i; | 3956 | int level, i; |
3958 | int complete, factor; | 3957 | int complete, factor; |
3959 | 3958 | ||
3960 | nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize; | 3959 | nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize; |
3961 | npages = nblocks >> JFS_SBI(sb)->l2nbperpage; | 3960 | npages = nblocks >> JFS_SBI(sb)->l2nbperpage; |
3962 | level = BMAPPGTOLEV(npages); | 3961 | level = BMAPPGTOLEV(npages); |
3963 | 3962 | ||
3964 | /* At each level, accumulate the number of dmap pages covered by | 3963 | /* At each level, accumulate the number of dmap pages covered by |
3965 | * the number of full child levels below it; | 3964 | * the number of full child levels below it; |
3966 | * repeat for the last incomplete child level. | 3965 | * repeat for the last incomplete child level. |
3967 | */ | 3966 | */ |
3968 | ndmaps = 0; | 3967 | ndmaps = 0; |
3969 | npages--; /* skip the first global control page */ | 3968 | npages--; /* skip the first global control page */ |
3970 | /* skip higher level control pages above top level covered by map */ | 3969 | /* skip higher level control pages above top level covered by map */ |
3971 | npages -= (2 - level); | 3970 | npages -= (2 - level); |
3972 | npages--; /* skip top level's control page */ | 3971 | npages--; /* skip top level's control page */ |
3973 | for (i = level; i >= 0; i--) { | 3972 | for (i = level; i >= 0; i--) { |
3974 | factor = | 3973 | factor = |
3975 | (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); | 3974 | (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); |
3976 | complete = (u32) npages / factor; | 3975 | complete = (u32) npages / factor; |
3977 | ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : | 3976 | ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : |
3978 | ((i == 1) ? LPERCTL : 1)); | 3977 | ((i == 1) ? LPERCTL : 1)); |
3979 | 3978 | ||
3980 | /* pages in last/incomplete child */ | 3979 | /* pages in last/incomplete child */ |
3981 | npages = (u32) npages % factor; | 3980 | npages = (u32) npages % factor; |
3982 | /* skip incomplete child's level control page */ | 3981 | /* skip incomplete child's level control page */ |
3983 | npages--; | 3982 | npages--; |
3984 | } | 3983 | } |
3985 | 3984 | ||
3986 | /* convert the number of dmaps into the number of blocks | 3985 | /* convert the number of dmaps into the number of blocks |
3987 | * which can be covered by the dmaps; | 3986 | * which can be covered by the dmaps; |
3988 | */ | 3987 | */ |
3989 | nblocks = ndmaps << L2BPERDMAP; | 3988 | nblocks = ndmaps << L2BPERDMAP; |
3990 | 3989 | ||
3991 | return (nblocks); | 3990 | return (nblocks); |
3992 | } | 3991 | } |
3993 | 3992 |
fs/jfs/jfs_imap.c
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2004 | 2 | * Copyright (C) International Business Machines Corp., 2000-2004 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
19 | /* | 19 | /* |
20 | * jfs_imap.c: inode allocation map manager | 20 | * jfs_imap.c: inode allocation map manager |
21 | * | 21 | * |
22 | * Serialization: | 22 | * Serialization: |
23 | * Each AG has a simple lock which is used to control the serialization of | 23 | * Each AG has a simple lock which is used to control the serialization of |
24 | * the AG level lists. This lock should be taken first whenever an AG | 24 | * the AG level lists. This lock should be taken first whenever an AG |
25 | * level list will be modified or accessed. | 25 | * level list will be modified or accessed. |
26 | * | 26 | * |
27 | * Each IAG is locked by obtaining the buffer for the IAG page. | 27 | * Each IAG is locked by obtaining the buffer for the IAG page. |
28 | * | 28 | * |
29 | * There is also a inode lock for the inode map inode. A read lock needs to | 29 | * There is also a inode lock for the inode map inode. A read lock needs to |
30 | * be taken whenever an IAG is read from the map or the global level | 30 | * be taken whenever an IAG is read from the map or the global level |
31 | * information is read. A write lock needs to be taken whenever the global | 31 | * information is read. A write lock needs to be taken whenever the global |
32 | * level information is modified or an atomic operation needs to be used. | 32 | * level information is modified or an atomic operation needs to be used. |
33 | * | 33 | * |
34 | * If more than one IAG is read at one time, the read lock may not | 34 | * If more than one IAG is read at one time, the read lock may not |
35 | * be given up until all of the IAG's are read. Otherwise, a deadlock | 35 | * be given up until all of the IAG's are read. Otherwise, a deadlock |
36 | * may occur when trying to obtain the read lock while another thread | 36 | * may occur when trying to obtain the read lock while another thread |
37 | * holding the read lock is waiting on the IAG already being held. | 37 | * holding the read lock is waiting on the IAG already being held. |
38 | * | 38 | * |
39 | * The control page of the inode map is read into memory by diMount(). | 39 | * The control page of the inode map is read into memory by diMount(). |
40 | * Thereafter it should only be modified in memory and then it will be | 40 | * Thereafter it should only be modified in memory and then it will be |
41 | * written out when the filesystem is unmounted by diUnmount(). | 41 | * written out when the filesystem is unmounted by diUnmount(). |
42 | */ | 42 | */ |
43 | 43 | ||
44 | #include <linux/fs.h> | 44 | #include <linux/fs.h> |
45 | #include <linux/buffer_head.h> | 45 | #include <linux/buffer_head.h> |
46 | #include <linux/pagemap.h> | 46 | #include <linux/pagemap.h> |
47 | #include <linux/quotaops.h> | 47 | #include <linux/quotaops.h> |
48 | 48 | ||
49 | #include "jfs_incore.h" | 49 | #include "jfs_incore.h" |
50 | #include "jfs_inode.h" | 50 | #include "jfs_inode.h" |
51 | #include "jfs_filsys.h" | 51 | #include "jfs_filsys.h" |
52 | #include "jfs_dinode.h" | 52 | #include "jfs_dinode.h" |
53 | #include "jfs_dmap.h" | 53 | #include "jfs_dmap.h" |
54 | #include "jfs_imap.h" | 54 | #include "jfs_imap.h" |
55 | #include "jfs_metapage.h" | 55 | #include "jfs_metapage.h" |
56 | #include "jfs_superblock.h" | 56 | #include "jfs_superblock.h" |
57 | #include "jfs_debug.h" | 57 | #include "jfs_debug.h" |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * __mark_inode_dirty expects inodes to be hashed. Since we don't want | 60 | * __mark_inode_dirty expects inodes to be hashed. Since we don't want |
61 | * special inodes in the fileset inode space, we hash them to a dummy head | 61 | * special inodes in the fileset inode space, we hash them to a dummy head |
62 | */ | 62 | */ |
63 | static HLIST_HEAD(aggregate_hash); | 63 | static HLIST_HEAD(aggregate_hash); |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * imap locks | 66 | * imap locks |
67 | */ | 67 | */ |
68 | /* iag free list lock */ | 68 | /* iag free list lock */ |
69 | #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) | 69 | #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) |
70 | #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) | 70 | #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) |
71 | #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) | 71 | #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) |
72 | 72 | ||
73 | /* per ag iag list locks */ | 73 | /* per ag iag list locks */ |
74 | #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) | 74 | #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) |
75 | #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) | 75 | #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) |
76 | #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) | 76 | #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) |
77 | 77 | ||
78 | /* | 78 | /* |
79 | * forward references | 79 | * forward references |
80 | */ | 80 | */ |
81 | static int diAllocAG(struct inomap *, int, bool, struct inode *); | 81 | static int diAllocAG(struct inomap *, int, bool, struct inode *); |
82 | static int diAllocAny(struct inomap *, int, bool, struct inode *); | 82 | static int diAllocAny(struct inomap *, int, bool, struct inode *); |
83 | static int diAllocBit(struct inomap *, struct iag *, int); | 83 | static int diAllocBit(struct inomap *, struct iag *, int); |
84 | static int diAllocExt(struct inomap *, int, struct inode *); | 84 | static int diAllocExt(struct inomap *, int, struct inode *); |
85 | static int diAllocIno(struct inomap *, int, struct inode *); | 85 | static int diAllocIno(struct inomap *, int, struct inode *); |
86 | static int diFindFree(u32, int); | 86 | static int diFindFree(u32, int); |
87 | static int diNewExt(struct inomap *, struct iag *, int); | 87 | static int diNewExt(struct inomap *, struct iag *, int); |
88 | static int diNewIAG(struct inomap *, int *, int, struct metapage **); | 88 | static int diNewIAG(struct inomap *, int *, int, struct metapage **); |
89 | static void duplicateIXtree(struct super_block *, s64, int, s64 *); | 89 | static void duplicateIXtree(struct super_block *, s64, int, s64 *); |
90 | 90 | ||
91 | static int diIAGRead(struct inomap * imap, int, struct metapage **); | 91 | static int diIAGRead(struct inomap * imap, int, struct metapage **); |
92 | static int copy_from_dinode(struct dinode *, struct inode *); | 92 | static int copy_from_dinode(struct dinode *, struct inode *); |
93 | static void copy_to_dinode(struct dinode *, struct inode *); | 93 | static void copy_to_dinode(struct dinode *, struct inode *); |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * NAME: diMount() | 96 | * NAME: diMount() |
97 | * | 97 | * |
98 | * FUNCTION: initialize the incore inode map control structures for | 98 | * FUNCTION: initialize the incore inode map control structures for |
99 | * a fileset or aggregate init time. | 99 | * a fileset or aggregate init time. |
100 | * | 100 | * |
101 | * the inode map's control structure (dinomap) is | 101 | * the inode map's control structure (dinomap) is |
102 | * brought in from disk and placed in virtual memory. | 102 | * brought in from disk and placed in virtual memory. |
103 | * | 103 | * |
104 | * PARAMETERS: | 104 | * PARAMETERS: |
105 | * ipimap - pointer to inode map inode for the aggregate or fileset. | 105 | * ipimap - pointer to inode map inode for the aggregate or fileset. |
106 | * | 106 | * |
107 | * RETURN VALUES: | 107 | * RETURN VALUES: |
108 | * 0 - success | 108 | * 0 - success |
109 | * -ENOMEM - insufficient free virtual memory. | 109 | * -ENOMEM - insufficient free virtual memory. |
110 | * -EIO - i/o error. | 110 | * -EIO - i/o error. |
111 | */ | 111 | */ |
112 | int diMount(struct inode *ipimap) | 112 | int diMount(struct inode *ipimap) |
113 | { | 113 | { |
114 | struct inomap *imap; | 114 | struct inomap *imap; |
115 | struct metapage *mp; | 115 | struct metapage *mp; |
116 | int index; | 116 | int index; |
117 | struct dinomap_disk *dinom_le; | 117 | struct dinomap_disk *dinom_le; |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * allocate/initialize the in-memory inode map control structure | 120 | * allocate/initialize the in-memory inode map control structure |
121 | */ | 121 | */ |
122 | /* allocate the in-memory inode map control structure. */ | 122 | /* allocate the in-memory inode map control structure. */ |
123 | imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); | 123 | imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); |
124 | if (imap == NULL) { | 124 | if (imap == NULL) { |
125 | jfs_err("diMount: kmalloc returned NULL!"); | 125 | jfs_err("diMount: kmalloc returned NULL!"); |
126 | return -ENOMEM; | 126 | return -ENOMEM; |
127 | } | 127 | } |
128 | 128 | ||
129 | /* read the on-disk inode map control structure. */ | 129 | /* read the on-disk inode map control structure. */ |
130 | 130 | ||
131 | mp = read_metapage(ipimap, | 131 | mp = read_metapage(ipimap, |
132 | IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, | 132 | IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, |
133 | PSIZE, 0); | 133 | PSIZE, 0); |
134 | if (mp == NULL) { | 134 | if (mp == NULL) { |
135 | kfree(imap); | 135 | kfree(imap); |
136 | return -EIO; | 136 | return -EIO; |
137 | } | 137 | } |
138 | 138 | ||
139 | /* copy the on-disk version to the in-memory version. */ | 139 | /* copy the on-disk version to the in-memory version. */ |
140 | dinom_le = (struct dinomap_disk *) mp->data; | 140 | dinom_le = (struct dinomap_disk *) mp->data; |
141 | imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); | 141 | imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); |
142 | imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); | 142 | imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); |
143 | atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); | 143 | atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); |
144 | atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); | 144 | atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); |
145 | imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); | 145 | imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); |
146 | imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); | 146 | imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); |
147 | for (index = 0; index < MAXAG; index++) { | 147 | for (index = 0; index < MAXAG; index++) { |
148 | imap->im_agctl[index].inofree = | 148 | imap->im_agctl[index].inofree = |
149 | le32_to_cpu(dinom_le->in_agctl[index].inofree); | 149 | le32_to_cpu(dinom_le->in_agctl[index].inofree); |
150 | imap->im_agctl[index].extfree = | 150 | imap->im_agctl[index].extfree = |
151 | le32_to_cpu(dinom_le->in_agctl[index].extfree); | 151 | le32_to_cpu(dinom_le->in_agctl[index].extfree); |
152 | imap->im_agctl[index].numinos = | 152 | imap->im_agctl[index].numinos = |
153 | le32_to_cpu(dinom_le->in_agctl[index].numinos); | 153 | le32_to_cpu(dinom_le->in_agctl[index].numinos); |
154 | imap->im_agctl[index].numfree = | 154 | imap->im_agctl[index].numfree = |
155 | le32_to_cpu(dinom_le->in_agctl[index].numfree); | 155 | le32_to_cpu(dinom_le->in_agctl[index].numfree); |
156 | } | 156 | } |
157 | 157 | ||
158 | /* release the buffer. */ | 158 | /* release the buffer. */ |
159 | release_metapage(mp); | 159 | release_metapage(mp); |
160 | 160 | ||
161 | /* | 161 | /* |
162 | * allocate/initialize inode allocation map locks | 162 | * allocate/initialize inode allocation map locks |
163 | */ | 163 | */ |
164 | /* allocate and init iag free list lock */ | 164 | /* allocate and init iag free list lock */ |
165 | IAGFREE_LOCK_INIT(imap); | 165 | IAGFREE_LOCK_INIT(imap); |
166 | 166 | ||
167 | /* allocate and init ag list locks */ | 167 | /* allocate and init ag list locks */ |
168 | for (index = 0; index < MAXAG; index++) { | 168 | for (index = 0; index < MAXAG; index++) { |
169 | AG_LOCK_INIT(imap, index); | 169 | AG_LOCK_INIT(imap, index); |
170 | } | 170 | } |
171 | 171 | ||
172 | /* bind the inode map inode and inode map control structure | 172 | /* bind the inode map inode and inode map control structure |
173 | * to each other. | 173 | * to each other. |
174 | */ | 174 | */ |
175 | imap->im_ipimap = ipimap; | 175 | imap->im_ipimap = ipimap; |
176 | JFS_IP(ipimap)->i_imap = imap; | 176 | JFS_IP(ipimap)->i_imap = imap; |
177 | 177 | ||
178 | return (0); | 178 | return (0); |
179 | } | 179 | } |
180 | 180 | ||
181 | 181 | ||
182 | /* | 182 | /* |
183 | * NAME: diUnmount() | 183 | * NAME: diUnmount() |
184 | * | 184 | * |
185 | * FUNCTION: write to disk the incore inode map control structures for | 185 | * FUNCTION: write to disk the incore inode map control structures for |
186 | * a fileset or aggregate at unmount time. | 186 | * a fileset or aggregate at unmount time. |
187 | * | 187 | * |
188 | * PARAMETERS: | 188 | * PARAMETERS: |
189 | * ipimap - pointer to inode map inode for the aggregate or fileset. | 189 | * ipimap - pointer to inode map inode for the aggregate or fileset. |
190 | * | 190 | * |
191 | * RETURN VALUES: | 191 | * RETURN VALUES: |
192 | * 0 - success | 192 | * 0 - success |
193 | * -ENOMEM - insufficient free virtual memory. | 193 | * -ENOMEM - insufficient free virtual memory. |
194 | * -EIO - i/o error. | 194 | * -EIO - i/o error. |
195 | */ | 195 | */ |
196 | int diUnmount(struct inode *ipimap, int mounterror) | 196 | int diUnmount(struct inode *ipimap, int mounterror) |
197 | { | 197 | { |
198 | struct inomap *imap = JFS_IP(ipimap)->i_imap; | 198 | struct inomap *imap = JFS_IP(ipimap)->i_imap; |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * update the on-disk inode map control structure | 201 | * update the on-disk inode map control structure |
202 | */ | 202 | */ |
203 | 203 | ||
204 | if (!(mounterror || isReadOnly(ipimap))) | 204 | if (!(mounterror || isReadOnly(ipimap))) |
205 | diSync(ipimap); | 205 | diSync(ipimap); |
206 | 206 | ||
207 | /* | 207 | /* |
208 | * Invalidate the page cache buffers | 208 | * Invalidate the page cache buffers |
209 | */ | 209 | */ |
210 | truncate_inode_pages(ipimap->i_mapping, 0); | 210 | truncate_inode_pages(ipimap->i_mapping, 0); |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * free in-memory control structure | 213 | * free in-memory control structure |
214 | */ | 214 | */ |
215 | kfree(imap); | 215 | kfree(imap); |
216 | 216 | ||
217 | return (0); | 217 | return (0); |
218 | } | 218 | } |
219 | 219 | ||
220 | 220 | ||
221 | /* | 221 | /* |
222 | * diSync() | 222 | * diSync() |
223 | */ | 223 | */ |
224 | int diSync(struct inode *ipimap) | 224 | int diSync(struct inode *ipimap) |
225 | { | 225 | { |
226 | struct dinomap_disk *dinom_le; | 226 | struct dinomap_disk *dinom_le; |
227 | struct inomap *imp = JFS_IP(ipimap)->i_imap; | 227 | struct inomap *imp = JFS_IP(ipimap)->i_imap; |
228 | struct metapage *mp; | 228 | struct metapage *mp; |
229 | int index; | 229 | int index; |
230 | 230 | ||
231 | /* | 231 | /* |
232 | * write imap global conrol page | 232 | * write imap global conrol page |
233 | */ | 233 | */ |
234 | /* read the on-disk inode map control structure */ | 234 | /* read the on-disk inode map control structure */ |
235 | mp = get_metapage(ipimap, | 235 | mp = get_metapage(ipimap, |
236 | IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, | 236 | IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, |
237 | PSIZE, 0); | 237 | PSIZE, 0); |
238 | if (mp == NULL) { | 238 | if (mp == NULL) { |
239 | jfs_err("diSync: get_metapage failed!"); | 239 | jfs_err("diSync: get_metapage failed!"); |
240 | return -EIO; | 240 | return -EIO; |
241 | } | 241 | } |
242 | 242 | ||
243 | /* copy the in-memory version to the on-disk version */ | 243 | /* copy the in-memory version to the on-disk version */ |
244 | dinom_le = (struct dinomap_disk *) mp->data; | 244 | dinom_le = (struct dinomap_disk *) mp->data; |
245 | dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); | 245 | dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); |
246 | dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); | 246 | dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); |
247 | dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); | 247 | dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); |
248 | dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); | 248 | dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); |
249 | dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); | 249 | dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); |
250 | dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); | 250 | dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); |
251 | for (index = 0; index < MAXAG; index++) { | 251 | for (index = 0; index < MAXAG; index++) { |
252 | dinom_le->in_agctl[index].inofree = | 252 | dinom_le->in_agctl[index].inofree = |
253 | cpu_to_le32(imp->im_agctl[index].inofree); | 253 | cpu_to_le32(imp->im_agctl[index].inofree); |
254 | dinom_le->in_agctl[index].extfree = | 254 | dinom_le->in_agctl[index].extfree = |
255 | cpu_to_le32(imp->im_agctl[index].extfree); | 255 | cpu_to_le32(imp->im_agctl[index].extfree); |
256 | dinom_le->in_agctl[index].numinos = | 256 | dinom_le->in_agctl[index].numinos = |
257 | cpu_to_le32(imp->im_agctl[index].numinos); | 257 | cpu_to_le32(imp->im_agctl[index].numinos); |
258 | dinom_le->in_agctl[index].numfree = | 258 | dinom_le->in_agctl[index].numfree = |
259 | cpu_to_le32(imp->im_agctl[index].numfree); | 259 | cpu_to_le32(imp->im_agctl[index].numfree); |
260 | } | 260 | } |
261 | 261 | ||
262 | /* write out the control structure */ | 262 | /* write out the control structure */ |
263 | write_metapage(mp); | 263 | write_metapage(mp); |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * write out dirty pages of imap | 266 | * write out dirty pages of imap |
267 | */ | 267 | */ |
268 | filemap_write_and_wait(ipimap->i_mapping); | 268 | filemap_write_and_wait(ipimap->i_mapping); |
269 | 269 | ||
270 | diWriteSpecial(ipimap, 0); | 270 | diWriteSpecial(ipimap, 0); |
271 | 271 | ||
272 | return (0); | 272 | return (0); |
273 | } | 273 | } |
274 | 274 | ||
275 | 275 | ||
276 | /* | 276 | /* |
277 | * NAME: diRead() | 277 | * NAME: diRead() |
278 | * | 278 | * |
279 | * FUNCTION: initialize an incore inode from disk. | 279 | * FUNCTION: initialize an incore inode from disk. |
280 | * | 280 | * |
281 | * on entry, the specifed incore inode should itself | 281 | * on entry, the specifed incore inode should itself |
282 | * specify the disk inode number corresponding to the | 282 | * specify the disk inode number corresponding to the |
283 | * incore inode (i.e. i_number should be initialized). | 283 | * incore inode (i.e. i_number should be initialized). |
284 | * | 284 | * |
285 | * this routine handles incore inode initialization for | 285 | * this routine handles incore inode initialization for |
286 | * both "special" and "regular" inodes. special inodes | 286 | * both "special" and "regular" inodes. special inodes |
287 | * are those required early in the mount process and | 287 | * are those required early in the mount process and |
288 | * require special handling since much of the file system | 288 | * require special handling since much of the file system |
289 | * is not yet initialized. these "special" inodes are | 289 | * is not yet initialized. these "special" inodes are |
290 | * identified by a NULL inode map inode pointer and are | 290 | * identified by a NULL inode map inode pointer and are |
291 | * actually initialized by a call to diReadSpecial(). | 291 | * actually initialized by a call to diReadSpecial(). |
292 | * | 292 | * |
293 | * for regular inodes, the iag describing the disk inode | 293 | * for regular inodes, the iag describing the disk inode |
294 | * is read from disk to determine the inode extent address | 294 | * is read from disk to determine the inode extent address |
295 | * for the disk inode. with the inode extent address in | 295 | * for the disk inode. with the inode extent address in |
296 | * hand, the page of the extent that contains the disk | 296 | * hand, the page of the extent that contains the disk |
297 | * inode is read and the disk inode is copied to the | 297 | * inode is read and the disk inode is copied to the |
298 | * incore inode. | 298 | * incore inode. |
299 | * | 299 | * |
300 | * PARAMETERS: | 300 | * PARAMETERS: |
301 | * ip - pointer to incore inode to be initialized from disk. | 301 | * ip - pointer to incore inode to be initialized from disk. |
302 | * | 302 | * |
303 | * RETURN VALUES: | 303 | * RETURN VALUES: |
304 | * 0 - success | 304 | * 0 - success |
305 | * -EIO - i/o error. | 305 | * -EIO - i/o error. |
306 | * -ENOMEM - insufficient memory | 306 | * -ENOMEM - insufficient memory |
307 | * | 307 | * |
308 | */ | 308 | */ |
309 | int diRead(struct inode *ip) | 309 | int diRead(struct inode *ip) |
310 | { | 310 | { |
311 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | 311 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); |
312 | int iagno, ino, extno, rc; | 312 | int iagno, ino, extno, rc; |
313 | struct inode *ipimap; | 313 | struct inode *ipimap; |
314 | struct dinode *dp; | 314 | struct dinode *dp; |
315 | struct iag *iagp; | 315 | struct iag *iagp; |
316 | struct metapage *mp; | 316 | struct metapage *mp; |
317 | s64 blkno, agstart; | 317 | s64 blkno, agstart; |
318 | struct inomap *imap; | 318 | struct inomap *imap; |
319 | int block_offset; | 319 | int block_offset; |
320 | int inodes_left; | 320 | int inodes_left; |
321 | unsigned long pageno; | 321 | unsigned long pageno; |
322 | int rel_inode; | 322 | int rel_inode; |
323 | 323 | ||
324 | jfs_info("diRead: ino = %ld", ip->i_ino); | 324 | jfs_info("diRead: ino = %ld", ip->i_ino); |
325 | 325 | ||
326 | ipimap = sbi->ipimap; | 326 | ipimap = sbi->ipimap; |
327 | JFS_IP(ip)->ipimap = ipimap; | 327 | JFS_IP(ip)->ipimap = ipimap; |
328 | 328 | ||
329 | /* determine the iag number for this inode (number) */ | 329 | /* determine the iag number for this inode (number) */ |
330 | iagno = INOTOIAG(ip->i_ino); | 330 | iagno = INOTOIAG(ip->i_ino); |
331 | 331 | ||
332 | /* read the iag */ | 332 | /* read the iag */ |
333 | imap = JFS_IP(ipimap)->i_imap; | 333 | imap = JFS_IP(ipimap)->i_imap; |
334 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); | 334 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
335 | rc = diIAGRead(imap, iagno, &mp); | 335 | rc = diIAGRead(imap, iagno, &mp); |
336 | IREAD_UNLOCK(ipimap); | 336 | IREAD_UNLOCK(ipimap); |
337 | if (rc) { | 337 | if (rc) { |
338 | jfs_err("diRead: diIAGRead returned %d", rc); | 338 | jfs_err("diRead: diIAGRead returned %d", rc); |
339 | return (rc); | 339 | return (rc); |
340 | } | 340 | } |
341 | 341 | ||
342 | iagp = (struct iag *) mp->data; | 342 | iagp = (struct iag *) mp->data; |
343 | 343 | ||
344 | /* determine inode extent that holds the disk inode */ | 344 | /* determine inode extent that holds the disk inode */ |
345 | ino = ip->i_ino & (INOSPERIAG - 1); | 345 | ino = ip->i_ino & (INOSPERIAG - 1); |
346 | extno = ino >> L2INOSPEREXT; | 346 | extno = ino >> L2INOSPEREXT; |
347 | 347 | ||
348 | if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || | 348 | if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || |
349 | (addressPXD(&iagp->inoext[extno]) == 0)) { | 349 | (addressPXD(&iagp->inoext[extno]) == 0)) { |
350 | release_metapage(mp); | 350 | release_metapage(mp); |
351 | return -ESTALE; | 351 | return -ESTALE; |
352 | } | 352 | } |
353 | 353 | ||
354 | /* get disk block number of the page within the inode extent | 354 | /* get disk block number of the page within the inode extent |
355 | * that holds the disk inode. | 355 | * that holds the disk inode. |
356 | */ | 356 | */ |
357 | blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); | 357 | blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); |
358 | 358 | ||
359 | /* get the ag for the iag */ | 359 | /* get the ag for the iag */ |
360 | agstart = le64_to_cpu(iagp->agstart); | 360 | agstart = le64_to_cpu(iagp->agstart); |
361 | 361 | ||
362 | release_metapage(mp); | 362 | release_metapage(mp); |
363 | 363 | ||
364 | rel_inode = (ino & (INOSPERPAGE - 1)); | 364 | rel_inode = (ino & (INOSPERPAGE - 1)); |
365 | pageno = blkno >> sbi->l2nbperpage; | 365 | pageno = blkno >> sbi->l2nbperpage; |
366 | 366 | ||
367 | if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { | 367 | if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { |
368 | /* | 368 | /* |
369 | * OS/2 didn't always align inode extents on page boundaries | 369 | * OS/2 didn't always align inode extents on page boundaries |
370 | */ | 370 | */ |
371 | inodes_left = | 371 | inodes_left = |
372 | (sbi->nbperpage - block_offset) << sbi->l2niperblk; | 372 | (sbi->nbperpage - block_offset) << sbi->l2niperblk; |
373 | 373 | ||
374 | if (rel_inode < inodes_left) | 374 | if (rel_inode < inodes_left) |
375 | rel_inode += block_offset << sbi->l2niperblk; | 375 | rel_inode += block_offset << sbi->l2niperblk; |
376 | else { | 376 | else { |
377 | pageno += 1; | 377 | pageno += 1; |
378 | rel_inode -= inodes_left; | 378 | rel_inode -= inodes_left; |
379 | } | 379 | } |
380 | } | 380 | } |
381 | 381 | ||
382 | /* read the page of disk inode */ | 382 | /* read the page of disk inode */ |
383 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); | 383 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); |
384 | if (!mp) { | 384 | if (!mp) { |
385 | jfs_err("diRead: read_metapage failed"); | 385 | jfs_err("diRead: read_metapage failed"); |
386 | return -EIO; | 386 | return -EIO; |
387 | } | 387 | } |
388 | 388 | ||
389 | /* locate the disk inode requested */ | 389 | /* locate the disk inode requested */ |
390 | dp = (struct dinode *) mp->data; | 390 | dp = (struct dinode *) mp->data; |
391 | dp += rel_inode; | 391 | dp += rel_inode; |
392 | 392 | ||
393 | if (ip->i_ino != le32_to_cpu(dp->di_number)) { | 393 | if (ip->i_ino != le32_to_cpu(dp->di_number)) { |
394 | jfs_error(ip->i_sb, "diRead: i_ino != di_number"); | 394 | jfs_error(ip->i_sb, "diRead: i_ino != di_number"); |
395 | rc = -EIO; | 395 | rc = -EIO; |
396 | } else if (le32_to_cpu(dp->di_nlink) == 0) | 396 | } else if (le32_to_cpu(dp->di_nlink) == 0) |
397 | rc = -ESTALE; | 397 | rc = -ESTALE; |
398 | else | 398 | else |
399 | /* copy the disk inode to the in-memory inode */ | 399 | /* copy the disk inode to the in-memory inode */ |
400 | rc = copy_from_dinode(dp, ip); | 400 | rc = copy_from_dinode(dp, ip); |
401 | 401 | ||
402 | release_metapage(mp); | 402 | release_metapage(mp); |
403 | 403 | ||
404 | /* set the ag for the inode */ | 404 | /* set the ag for the inode */ |
405 | JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); | 405 | JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); |
406 | JFS_IP(ip)->active_ag = -1; | 406 | JFS_IP(ip)->active_ag = -1; |
407 | 407 | ||
408 | return (rc); | 408 | return (rc); |
409 | } | 409 | } |
410 | 410 | ||
411 | 411 | ||
412 | /* | 412 | /* |
413 | * NAME: diReadSpecial() | 413 | * NAME: diReadSpecial() |
414 | * | 414 | * |
415 | * FUNCTION: initialize a 'special' inode from disk. | 415 | * FUNCTION: initialize a 'special' inode from disk. |
416 | * | 416 | * |
417 | * this routines handles aggregate level inodes. The | 417 | * this routines handles aggregate level inodes. The |
418 | * inode cache cannot differentiate between the | 418 | * inode cache cannot differentiate between the |
419 | * aggregate inodes and the filesystem inodes, so we | 419 | * aggregate inodes and the filesystem inodes, so we |
420 | * handle these here. We don't actually use the aggregate | 420 | * handle these here. We don't actually use the aggregate |
421 | * inode map, since these inodes are at a fixed location | 421 | * inode map, since these inodes are at a fixed location |
422 | * and in some cases the aggregate inode map isn't initialized | 422 | * and in some cases the aggregate inode map isn't initialized |
423 | * yet. | 423 | * yet. |
424 | * | 424 | * |
425 | * PARAMETERS: | 425 | * PARAMETERS: |
426 | * sb - filesystem superblock | 426 | * sb - filesystem superblock |
427 | * inum - aggregate inode number | 427 | * inum - aggregate inode number |
428 | * secondary - 1 if secondary aggregate inode table | 428 | * secondary - 1 if secondary aggregate inode table |
429 | * | 429 | * |
430 | * RETURN VALUES: | 430 | * RETURN VALUES: |
431 | * new inode - success | 431 | * new inode - success |
432 | * NULL - i/o error. | 432 | * NULL - i/o error. |
433 | */ | 433 | */ |
434 | struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) | 434 | struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) |
435 | { | 435 | { |
436 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 436 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
437 | uint address; | 437 | uint address; |
438 | struct dinode *dp; | 438 | struct dinode *dp; |
439 | struct inode *ip; | 439 | struct inode *ip; |
440 | struct metapage *mp; | 440 | struct metapage *mp; |
441 | 441 | ||
442 | ip = new_inode(sb); | 442 | ip = new_inode(sb); |
443 | if (ip == NULL) { | 443 | if (ip == NULL) { |
444 | jfs_err("diReadSpecial: new_inode returned NULL!"); | 444 | jfs_err("diReadSpecial: new_inode returned NULL!"); |
445 | return ip; | 445 | return ip; |
446 | } | 446 | } |
447 | 447 | ||
448 | if (secondary) { | 448 | if (secondary) { |
449 | address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; | 449 | address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; |
450 | JFS_IP(ip)->ipimap = sbi->ipaimap2; | 450 | JFS_IP(ip)->ipimap = sbi->ipaimap2; |
451 | } else { | 451 | } else { |
452 | address = AITBL_OFF >> L2PSIZE; | 452 | address = AITBL_OFF >> L2PSIZE; |
453 | JFS_IP(ip)->ipimap = sbi->ipaimap; | 453 | JFS_IP(ip)->ipimap = sbi->ipaimap; |
454 | } | 454 | } |
455 | 455 | ||
456 | ASSERT(inum < INOSPEREXT); | 456 | ASSERT(inum < INOSPEREXT); |
457 | 457 | ||
458 | ip->i_ino = inum; | 458 | ip->i_ino = inum; |
459 | 459 | ||
460 | address += inum >> 3; /* 8 inodes per 4K page */ | 460 | address += inum >> 3; /* 8 inodes per 4K page */ |
461 | 461 | ||
462 | /* read the page of fixed disk inode (AIT) in raw mode */ | 462 | /* read the page of fixed disk inode (AIT) in raw mode */ |
463 | mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); | 463 | mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); |
464 | if (mp == NULL) { | 464 | if (mp == NULL) { |
465 | ip->i_nlink = 1; /* Don't want iput() deleting it */ | 465 | ip->i_nlink = 1; /* Don't want iput() deleting it */ |
466 | iput(ip); | 466 | iput(ip); |
467 | return (NULL); | 467 | return (NULL); |
468 | } | 468 | } |
469 | 469 | ||
470 | /* get the pointer to the disk inode of interest */ | 470 | /* get the pointer to the disk inode of interest */ |
471 | dp = (struct dinode *) (mp->data); | 471 | dp = (struct dinode *) (mp->data); |
472 | dp += inum % 8; /* 8 inodes per 4K page */ | 472 | dp += inum % 8; /* 8 inodes per 4K page */ |
473 | 473 | ||
474 | /* copy on-disk inode to in-memory inode */ | 474 | /* copy on-disk inode to in-memory inode */ |
475 | if ((copy_from_dinode(dp, ip)) != 0) { | 475 | if ((copy_from_dinode(dp, ip)) != 0) { |
476 | /* handle bad return by returning NULL for ip */ | 476 | /* handle bad return by returning NULL for ip */ |
477 | ip->i_nlink = 1; /* Don't want iput() deleting it */ | 477 | ip->i_nlink = 1; /* Don't want iput() deleting it */ |
478 | iput(ip); | 478 | iput(ip); |
479 | /* release the page */ | 479 | /* release the page */ |
480 | release_metapage(mp); | 480 | release_metapage(mp); |
481 | return (NULL); | 481 | return (NULL); |
482 | 482 | ||
483 | } | 483 | } |
484 | 484 | ||
485 | ip->i_mapping->a_ops = &jfs_metapage_aops; | 485 | ip->i_mapping->a_ops = &jfs_metapage_aops; |
486 | mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); | 486 | mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); |
487 | 487 | ||
488 | /* Allocations to metadata inodes should not affect quotas */ | 488 | /* Allocations to metadata inodes should not affect quotas */ |
489 | ip->i_flags |= S_NOQUOTA; | 489 | ip->i_flags |= S_NOQUOTA; |
490 | 490 | ||
491 | if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { | 491 | if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { |
492 | sbi->gengen = le32_to_cpu(dp->di_gengen); | 492 | sbi->gengen = le32_to_cpu(dp->di_gengen); |
493 | sbi->inostamp = le32_to_cpu(dp->di_inostamp); | 493 | sbi->inostamp = le32_to_cpu(dp->di_inostamp); |
494 | } | 494 | } |
495 | 495 | ||
496 | /* release the page */ | 496 | /* release the page */ |
497 | release_metapage(mp); | 497 | release_metapage(mp); |
498 | 498 | ||
499 | hlist_add_head(&ip->i_hash, &aggregate_hash); | 499 | hlist_add_head(&ip->i_hash, &aggregate_hash); |
500 | 500 | ||
501 | return (ip); | 501 | return (ip); |
502 | } | 502 | } |
503 | 503 | ||
504 | /* | 504 | /* |
505 | * NAME: diWriteSpecial() | 505 | * NAME: diWriteSpecial() |
506 | * | 506 | * |
507 | * FUNCTION: Write the special inode to disk | 507 | * FUNCTION: Write the special inode to disk |
508 | * | 508 | * |
509 | * PARAMETERS: | 509 | * PARAMETERS: |
510 | * ip - special inode | 510 | * ip - special inode |
511 | * secondary - 1 if secondary aggregate inode table | 511 | * secondary - 1 if secondary aggregate inode table |
512 | * | 512 | * |
513 | * RETURN VALUES: none | 513 | * RETURN VALUES: none |
514 | */ | 514 | */ |
515 | 515 | ||
516 | void diWriteSpecial(struct inode *ip, int secondary) | 516 | void diWriteSpecial(struct inode *ip, int secondary) |
517 | { | 517 | { |
518 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | 518 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); |
519 | uint address; | 519 | uint address; |
520 | struct dinode *dp; | 520 | struct dinode *dp; |
521 | ino_t inum = ip->i_ino; | 521 | ino_t inum = ip->i_ino; |
522 | struct metapage *mp; | 522 | struct metapage *mp; |
523 | 523 | ||
524 | if (secondary) | 524 | if (secondary) |
525 | address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; | 525 | address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; |
526 | else | 526 | else |
527 | address = AITBL_OFF >> L2PSIZE; | 527 | address = AITBL_OFF >> L2PSIZE; |
528 | 528 | ||
529 | ASSERT(inum < INOSPEREXT); | 529 | ASSERT(inum < INOSPEREXT); |
530 | 530 | ||
531 | address += inum >> 3; /* 8 inodes per 4K page */ | 531 | address += inum >> 3; /* 8 inodes per 4K page */ |
532 | 532 | ||
533 | /* read the page of fixed disk inode (AIT) in raw mode */ | 533 | /* read the page of fixed disk inode (AIT) in raw mode */ |
534 | mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); | 534 | mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); |
535 | if (mp == NULL) { | 535 | if (mp == NULL) { |
536 | jfs_err("diWriteSpecial: failed to read aggregate inode " | 536 | jfs_err("diWriteSpecial: failed to read aggregate inode " |
537 | "extent!"); | 537 | "extent!"); |
538 | return; | 538 | return; |
539 | } | 539 | } |
540 | 540 | ||
541 | /* get the pointer to the disk inode of interest */ | 541 | /* get the pointer to the disk inode of interest */ |
542 | dp = (struct dinode *) (mp->data); | 542 | dp = (struct dinode *) (mp->data); |
543 | dp += inum % 8; /* 8 inodes per 4K page */ | 543 | dp += inum % 8; /* 8 inodes per 4K page */ |
544 | 544 | ||
545 | /* copy on-disk inode to in-memory inode */ | 545 | /* copy on-disk inode to in-memory inode */ |
546 | copy_to_dinode(dp, ip); | 546 | copy_to_dinode(dp, ip); |
547 | memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); | 547 | memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); |
548 | 548 | ||
549 | if (inum == FILESYSTEM_I) | 549 | if (inum == FILESYSTEM_I) |
550 | dp->di_gengen = cpu_to_le32(sbi->gengen); | 550 | dp->di_gengen = cpu_to_le32(sbi->gengen); |
551 | 551 | ||
552 | /* write the page */ | 552 | /* write the page */ |
553 | write_metapage(mp); | 553 | write_metapage(mp); |
554 | } | 554 | } |
555 | 555 | ||
556 | /* | 556 | /* |
557 | * NAME: diFreeSpecial() | 557 | * NAME: diFreeSpecial() |
558 | * | 558 | * |
559 | * FUNCTION: Free allocated space for special inode | 559 | * FUNCTION: Free allocated space for special inode |
560 | */ | 560 | */ |
561 | void diFreeSpecial(struct inode *ip) | 561 | void diFreeSpecial(struct inode *ip) |
562 | { | 562 | { |
563 | if (ip == NULL) { | 563 | if (ip == NULL) { |
564 | jfs_err("diFreeSpecial called with NULL ip!"); | 564 | jfs_err("diFreeSpecial called with NULL ip!"); |
565 | return; | 565 | return; |
566 | } | 566 | } |
567 | filemap_write_and_wait(ip->i_mapping); | 567 | filemap_write_and_wait(ip->i_mapping); |
568 | truncate_inode_pages(ip->i_mapping, 0); | 568 | truncate_inode_pages(ip->i_mapping, 0); |
569 | iput(ip); | 569 | iput(ip); |
570 | } | 570 | } |
571 | 571 | ||
572 | 572 | ||
573 | 573 | ||
574 | /* | 574 | /* |
575 | * NAME: diWrite() | 575 | * NAME: diWrite() |
576 | * | 576 | * |
577 | * FUNCTION: write the on-disk inode portion of the in-memory inode | 577 | * FUNCTION: write the on-disk inode portion of the in-memory inode |
578 | * to its corresponding on-disk inode. | 578 | * to its corresponding on-disk inode. |
579 | * | 579 | * |
580 | * on entry, the specifed incore inode should itself | 580 | * on entry, the specifed incore inode should itself |
581 | * specify the disk inode number corresponding to the | 581 | * specify the disk inode number corresponding to the |
582 | * incore inode (i.e. i_number should be initialized). | 582 | * incore inode (i.e. i_number should be initialized). |
583 | * | 583 | * |
584 | * the inode contains the inode extent address for the disk | 584 | * the inode contains the inode extent address for the disk |
585 | * inode. with the inode extent address in hand, the | 585 | * inode. with the inode extent address in hand, the |
586 | * page of the extent that contains the disk inode is | 586 | * page of the extent that contains the disk inode is |
587 | * read and the disk inode portion of the incore inode | 587 | * read and the disk inode portion of the incore inode |
588 | * is copied to the disk inode. | 588 | * is copied to the disk inode. |
589 | * | 589 | * |
590 | * PARAMETERS: | 590 | * PARAMETERS: |
591 | * tid - transacation id | 591 | * tid - transacation id |
592 | * ip - pointer to incore inode to be written to the inode extent. | 592 | * ip - pointer to incore inode to be written to the inode extent. |
593 | * | 593 | * |
594 | * RETURN VALUES: | 594 | * RETURN VALUES: |
595 | * 0 - success | 595 | * 0 - success |
596 | * -EIO - i/o error. | 596 | * -EIO - i/o error. |
597 | */ | 597 | */ |
598 | int diWrite(tid_t tid, struct inode *ip) | 598 | int diWrite(tid_t tid, struct inode *ip) |
599 | { | 599 | { |
600 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | 600 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); |
601 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 601 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
602 | int rc = 0; | 602 | int rc = 0; |
603 | s32 ino; | 603 | s32 ino; |
604 | struct dinode *dp; | 604 | struct dinode *dp; |
605 | s64 blkno; | 605 | s64 blkno; |
606 | int block_offset; | 606 | int block_offset; |
607 | int inodes_left; | 607 | int inodes_left; |
608 | struct metapage *mp; | 608 | struct metapage *mp; |
609 | unsigned long pageno; | 609 | unsigned long pageno; |
610 | int rel_inode; | 610 | int rel_inode; |
611 | int dioffset; | 611 | int dioffset; |
612 | struct inode *ipimap; | 612 | struct inode *ipimap; |
613 | uint type; | 613 | uint type; |
614 | lid_t lid; | 614 | lid_t lid; |
615 | struct tlock *ditlck, *tlck; | 615 | struct tlock *ditlck, *tlck; |
616 | struct linelock *dilinelock, *ilinelock; | 616 | struct linelock *dilinelock, *ilinelock; |
617 | struct lv *lv; | 617 | struct lv *lv; |
618 | int n; | 618 | int n; |
619 | 619 | ||
620 | ipimap = jfs_ip->ipimap; | 620 | ipimap = jfs_ip->ipimap; |
621 | 621 | ||
622 | ino = ip->i_ino & (INOSPERIAG - 1); | 622 | ino = ip->i_ino & (INOSPERIAG - 1); |
623 | 623 | ||
624 | if (!addressPXD(&(jfs_ip->ixpxd)) || | 624 | if (!addressPXD(&(jfs_ip->ixpxd)) || |
625 | (lengthPXD(&(jfs_ip->ixpxd)) != | 625 | (lengthPXD(&(jfs_ip->ixpxd)) != |
626 | JFS_IP(ipimap)->i_imap->im_nbperiext)) { | 626 | JFS_IP(ipimap)->i_imap->im_nbperiext)) { |
627 | jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); | 627 | jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); |
628 | return -EIO; | 628 | return -EIO; |
629 | } | 629 | } |
630 | 630 | ||
631 | /* | 631 | /* |
632 | * read the page of disk inode containing the specified inode: | 632 | * read the page of disk inode containing the specified inode: |
633 | */ | 633 | */ |
634 | /* compute the block address of the page */ | 634 | /* compute the block address of the page */ |
635 | blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); | 635 | blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); |
636 | 636 | ||
637 | rel_inode = (ino & (INOSPERPAGE - 1)); | 637 | rel_inode = (ino & (INOSPERPAGE - 1)); |
638 | pageno = blkno >> sbi->l2nbperpage; | 638 | pageno = blkno >> sbi->l2nbperpage; |
639 | 639 | ||
640 | if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { | 640 | if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { |
641 | /* | 641 | /* |
642 | * OS/2 didn't always align inode extents on page boundaries | 642 | * OS/2 didn't always align inode extents on page boundaries |
643 | */ | 643 | */ |
644 | inodes_left = | 644 | inodes_left = |
645 | (sbi->nbperpage - block_offset) << sbi->l2niperblk; | 645 | (sbi->nbperpage - block_offset) << sbi->l2niperblk; |
646 | 646 | ||
647 | if (rel_inode < inodes_left) | 647 | if (rel_inode < inodes_left) |
648 | rel_inode += block_offset << sbi->l2niperblk; | 648 | rel_inode += block_offset << sbi->l2niperblk; |
649 | else { | 649 | else { |
650 | pageno += 1; | 650 | pageno += 1; |
651 | rel_inode -= inodes_left; | 651 | rel_inode -= inodes_left; |
652 | } | 652 | } |
653 | } | 653 | } |
654 | /* read the page of disk inode */ | 654 | /* read the page of disk inode */ |
655 | retry: | 655 | retry: |
656 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); | 656 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); |
657 | if (!mp) | 657 | if (!mp) |
658 | return -EIO; | 658 | return -EIO; |
659 | 659 | ||
660 | /* get the pointer to the disk inode */ | 660 | /* get the pointer to the disk inode */ |
661 | dp = (struct dinode *) mp->data; | 661 | dp = (struct dinode *) mp->data; |
662 | dp += rel_inode; | 662 | dp += rel_inode; |
663 | 663 | ||
664 | dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; | 664 | dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; |
665 | 665 | ||
666 | /* | 666 | /* |
667 | * acquire transaction lock on the on-disk inode; | 667 | * acquire transaction lock on the on-disk inode; |
668 | * N.B. tlock is acquired on ipimap not ip; | 668 | * N.B. tlock is acquired on ipimap not ip; |
669 | */ | 669 | */ |
670 | if ((ditlck = | 670 | if ((ditlck = |
671 | txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) | 671 | txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) |
672 | goto retry; | 672 | goto retry; |
673 | dilinelock = (struct linelock *) & ditlck->lock; | 673 | dilinelock = (struct linelock *) & ditlck->lock; |
674 | 674 | ||
675 | /* | 675 | /* |
676 | * copy btree root from in-memory inode to on-disk inode | 676 | * copy btree root from in-memory inode to on-disk inode |
677 | * | 677 | * |
678 | * (tlock is taken from inline B+-tree root in in-memory | 678 | * (tlock is taken from inline B+-tree root in in-memory |
679 | * inode when the B+-tree root is updated, which is pointed | 679 | * inode when the B+-tree root is updated, which is pointed |
680 | * by jfs_ip->blid as well as being on tx tlock list) | 680 | * by jfs_ip->blid as well as being on tx tlock list) |
681 | * | 681 | * |
682 | * further processing of btree root is based on the copy | 682 | * further processing of btree root is based on the copy |
683 | * in in-memory inode, where txLog() will log from, and, | 683 | * in in-memory inode, where txLog() will log from, and, |
684 | * for xtree root, txUpdateMap() will update map and reset | 684 | * for xtree root, txUpdateMap() will update map and reset |
685 | * XAD_NEW bit; | 685 | * XAD_NEW bit; |
686 | */ | 686 | */ |
687 | 687 | ||
688 | if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { | 688 | if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { |
689 | /* | 689 | /* |
690 | * This is the special xtree inside the directory for storing | 690 | * This is the special xtree inside the directory for storing |
691 | * the directory table | 691 | * the directory table |
692 | */ | 692 | */ |
693 | xtpage_t *p, *xp; | 693 | xtpage_t *p, *xp; |
694 | xad_t *xad; | 694 | xad_t *xad; |
695 | 695 | ||
696 | jfs_ip->xtlid = 0; | 696 | jfs_ip->xtlid = 0; |
697 | tlck = lid_to_tlock(lid); | 697 | tlck = lid_to_tlock(lid); |
698 | assert(tlck->type & tlckXTREE); | 698 | assert(tlck->type & tlckXTREE); |
699 | tlck->type |= tlckBTROOT; | 699 | tlck->type |= tlckBTROOT; |
700 | tlck->mp = mp; | 700 | tlck->mp = mp; |
701 | ilinelock = (struct linelock *) & tlck->lock; | 701 | ilinelock = (struct linelock *) & tlck->lock; |
702 | 702 | ||
703 | /* | 703 | /* |
704 | * copy xtree root from inode to dinode: | 704 | * copy xtree root from inode to dinode: |
705 | */ | 705 | */ |
706 | p = &jfs_ip->i_xtroot; | 706 | p = &jfs_ip->i_xtroot; |
707 | xp = (xtpage_t *) &dp->di_dirtable; | 707 | xp = (xtpage_t *) &dp->di_dirtable; |
708 | lv = ilinelock->lv; | 708 | lv = ilinelock->lv; |
709 | for (n = 0; n < ilinelock->index; n++, lv++) { | 709 | for (n = 0; n < ilinelock->index; n++, lv++) { |
710 | memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], | 710 | memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], |
711 | lv->length << L2XTSLOTSIZE); | 711 | lv->length << L2XTSLOTSIZE); |
712 | } | 712 | } |
713 | 713 | ||
714 | /* reset on-disk (metadata page) xtree XAD_NEW bit */ | 714 | /* reset on-disk (metadata page) xtree XAD_NEW bit */ |
715 | xad = &xp->xad[XTENTRYSTART]; | 715 | xad = &xp->xad[XTENTRYSTART]; |
716 | for (n = XTENTRYSTART; | 716 | for (n = XTENTRYSTART; |
717 | n < le16_to_cpu(xp->header.nextindex); n++, xad++) | 717 | n < le16_to_cpu(xp->header.nextindex); n++, xad++) |
718 | if (xad->flag & (XAD_NEW | XAD_EXTENDED)) | 718 | if (xad->flag & (XAD_NEW | XAD_EXTENDED)) |
719 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); | 719 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); |
720 | } | 720 | } |
721 | 721 | ||
722 | if ((lid = jfs_ip->blid) == 0) | 722 | if ((lid = jfs_ip->blid) == 0) |
723 | goto inlineData; | 723 | goto inlineData; |
724 | jfs_ip->blid = 0; | 724 | jfs_ip->blid = 0; |
725 | 725 | ||
726 | tlck = lid_to_tlock(lid); | 726 | tlck = lid_to_tlock(lid); |
727 | type = tlck->type; | 727 | type = tlck->type; |
728 | tlck->type |= tlckBTROOT; | 728 | tlck->type |= tlckBTROOT; |
729 | tlck->mp = mp; | 729 | tlck->mp = mp; |
730 | ilinelock = (struct linelock *) & tlck->lock; | 730 | ilinelock = (struct linelock *) & tlck->lock; |
731 | 731 | ||
732 | /* | 732 | /* |
733 | * regular file: 16 byte (XAD slot) granularity | 733 | * regular file: 16 byte (XAD slot) granularity |
734 | */ | 734 | */ |
735 | if (type & tlckXTREE) { | 735 | if (type & tlckXTREE) { |
736 | xtpage_t *p, *xp; | 736 | xtpage_t *p, *xp; |
737 | xad_t *xad; | 737 | xad_t *xad; |
738 | 738 | ||
739 | /* | 739 | /* |
740 | * copy xtree root from inode to dinode: | 740 | * copy xtree root from inode to dinode: |
741 | */ | 741 | */ |
742 | p = &jfs_ip->i_xtroot; | 742 | p = &jfs_ip->i_xtroot; |
743 | xp = &dp->di_xtroot; | 743 | xp = &dp->di_xtroot; |
744 | lv = ilinelock->lv; | 744 | lv = ilinelock->lv; |
745 | for (n = 0; n < ilinelock->index; n++, lv++) { | 745 | for (n = 0; n < ilinelock->index; n++, lv++) { |
746 | memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], | 746 | memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], |
747 | lv->length << L2XTSLOTSIZE); | 747 | lv->length << L2XTSLOTSIZE); |
748 | } | 748 | } |
749 | 749 | ||
750 | /* reset on-disk (metadata page) xtree XAD_NEW bit */ | 750 | /* reset on-disk (metadata page) xtree XAD_NEW bit */ |
751 | xad = &xp->xad[XTENTRYSTART]; | 751 | xad = &xp->xad[XTENTRYSTART]; |
752 | for (n = XTENTRYSTART; | 752 | for (n = XTENTRYSTART; |
753 | n < le16_to_cpu(xp->header.nextindex); n++, xad++) | 753 | n < le16_to_cpu(xp->header.nextindex); n++, xad++) |
754 | if (xad->flag & (XAD_NEW | XAD_EXTENDED)) | 754 | if (xad->flag & (XAD_NEW | XAD_EXTENDED)) |
755 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); | 755 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); |
756 | } | 756 | } |
757 | /* | 757 | /* |
758 | * directory: 32 byte (directory entry slot) granularity | 758 | * directory: 32 byte (directory entry slot) granularity |
759 | */ | 759 | */ |
760 | else if (type & tlckDTREE) { | 760 | else if (type & tlckDTREE) { |
761 | dtpage_t *p, *xp; | 761 | dtpage_t *p, *xp; |
762 | 762 | ||
763 | /* | 763 | /* |
764 | * copy dtree root from inode to dinode: | 764 | * copy dtree root from inode to dinode: |
765 | */ | 765 | */ |
766 | p = (dtpage_t *) &jfs_ip->i_dtroot; | 766 | p = (dtpage_t *) &jfs_ip->i_dtroot; |
767 | xp = (dtpage_t *) & dp->di_dtroot; | 767 | xp = (dtpage_t *) & dp->di_dtroot; |
768 | lv = ilinelock->lv; | 768 | lv = ilinelock->lv; |
769 | for (n = 0; n < ilinelock->index; n++, lv++) { | 769 | for (n = 0; n < ilinelock->index; n++, lv++) { |
770 | memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], | 770 | memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], |
771 | lv->length << L2DTSLOTSIZE); | 771 | lv->length << L2DTSLOTSIZE); |
772 | } | 772 | } |
773 | } else { | 773 | } else { |
774 | jfs_err("diWrite: UFO tlock"); | 774 | jfs_err("diWrite: UFO tlock"); |
775 | } | 775 | } |
776 | 776 | ||
777 | inlineData: | 777 | inlineData: |
778 | /* | 778 | /* |
779 | * copy inline symlink from in-memory inode to on-disk inode | 779 | * copy inline symlink from in-memory inode to on-disk inode |
780 | */ | 780 | */ |
781 | if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { | 781 | if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { |
782 | lv = & dilinelock->lv[dilinelock->index]; | 782 | lv = & dilinelock->lv[dilinelock->index]; |
783 | lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; | 783 | lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; |
784 | lv->length = 2; | 784 | lv->length = 2; |
785 | memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); | 785 | memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); |
786 | dilinelock->index++; | 786 | dilinelock->index++; |
787 | } | 787 | } |
788 | /* | 788 | /* |
789 | * copy inline data from in-memory inode to on-disk inode: | 789 | * copy inline data from in-memory inode to on-disk inode: |
790 | * 128 byte slot granularity | 790 | * 128 byte slot granularity |
791 | */ | 791 | */ |
792 | if (test_cflag(COMMIT_Inlineea, ip)) { | 792 | if (test_cflag(COMMIT_Inlineea, ip)) { |
793 | lv = & dilinelock->lv[dilinelock->index]; | 793 | lv = & dilinelock->lv[dilinelock->index]; |
794 | lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; | 794 | lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; |
795 | lv->length = 1; | 795 | lv->length = 1; |
796 | memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); | 796 | memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); |
797 | dilinelock->index++; | 797 | dilinelock->index++; |
798 | 798 | ||
799 | clear_cflag(COMMIT_Inlineea, ip); | 799 | clear_cflag(COMMIT_Inlineea, ip); |
800 | } | 800 | } |
801 | 801 | ||
802 | /* | 802 | /* |
803 | * lock/copy inode base: 128 byte slot granularity | 803 | * lock/copy inode base: 128 byte slot granularity |
804 | */ | 804 | */ |
805 | lv = & dilinelock->lv[dilinelock->index]; | 805 | lv = & dilinelock->lv[dilinelock->index]; |
806 | lv->offset = dioffset >> L2INODESLOTSIZE; | 806 | lv->offset = dioffset >> L2INODESLOTSIZE; |
807 | copy_to_dinode(dp, ip); | 807 | copy_to_dinode(dp, ip); |
808 | if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { | 808 | if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { |
809 | lv->length = 2; | 809 | lv->length = 2; |
810 | memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); | 810 | memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); |
811 | } else | 811 | } else |
812 | lv->length = 1; | 812 | lv->length = 1; |
813 | dilinelock->index++; | 813 | dilinelock->index++; |
814 | 814 | ||
815 | /* release the buffer holding the updated on-disk inode. | 815 | /* release the buffer holding the updated on-disk inode. |
816 | * the buffer will be later written by commit processing. | 816 | * the buffer will be later written by commit processing. |
817 | */ | 817 | */ |
818 | write_metapage(mp); | 818 | write_metapage(mp); |
819 | 819 | ||
820 | return (rc); | 820 | return (rc); |
821 | } | 821 | } |
822 | 822 | ||
823 | 823 | ||
824 | /* | 824 | /* |
825 | * NAME: diFree(ip) | 825 | * NAME: diFree(ip) |
826 | * | 826 | * |
827 | * FUNCTION: free a specified inode from the inode working map | 827 | * FUNCTION: free a specified inode from the inode working map |
828 | * for a fileset or aggregate. | 828 | * for a fileset or aggregate. |
829 | * | 829 | * |
830 | * if the inode to be freed represents the first (only) | 830 | * if the inode to be freed represents the first (only) |
831 | * free inode within the iag, the iag will be placed on | 831 | * free inode within the iag, the iag will be placed on |
832 | * the ag free inode list. | 832 | * the ag free inode list. |
833 | * | 833 | * |
834 | * freeing the inode will cause the inode extent to be | 834 | * freeing the inode will cause the inode extent to be |
835 | * freed if the inode is the only allocated inode within | 835 | * freed if the inode is the only allocated inode within |
836 | * the extent. in this case all the disk resource backing | 836 | * the extent. in this case all the disk resource backing |
837 | * up the inode extent will be freed. in addition, the iag | 837 | * up the inode extent will be freed. in addition, the iag |
838 | * will be placed on the ag extent free list if the extent | 838 | * will be placed on the ag extent free list if the extent |
839 | * is the first free extent in the iag. if freeing the | 839 | * is the first free extent in the iag. if freeing the |
840 | * extent also means that no free inodes will exist for | 840 | * extent also means that no free inodes will exist for |
841 | * the iag, the iag will also be removed from the ag free | 841 | * the iag, the iag will also be removed from the ag free |
842 | * inode list. | 842 | * inode list. |
843 | * | 843 | * |
844 | * the iag describing the inode will be freed if the extent | 844 | * the iag describing the inode will be freed if the extent |
845 | * is to be freed and it is the only backed extent within | 845 | * is to be freed and it is the only backed extent within |
846 | * the iag. in this case, the iag will be removed from the | 846 | * the iag. in this case, the iag will be removed from the |
847 | * ag free extent list and ag free inode list and placed on | 847 | * ag free extent list and ag free inode list and placed on |
848 | * the inode map's free iag list. | 848 | * the inode map's free iag list. |
849 | * | 849 | * |
850 | * a careful update approach is used to provide consistency | 850 | * a careful update approach is used to provide consistency |
851 | * in the face of updates to multiple buffers. under this | 851 | * in the face of updates to multiple buffers. under this |
852 | * approach, all required buffers are obtained before making | 852 | * approach, all required buffers are obtained before making |
853 | * any updates and are held until all updates are complete. | 853 | * any updates and are held until all updates are complete. |
854 | * | 854 | * |
855 | * PARAMETERS: | 855 | * PARAMETERS: |
856 | * ip - inode to be freed. | 856 | * ip - inode to be freed. |
857 | * | 857 | * |
858 | * RETURN VALUES: | 858 | * RETURN VALUES: |
859 | * 0 - success | 859 | * 0 - success |
860 | * -EIO - i/o error. | 860 | * -EIO - i/o error. |
861 | */ | 861 | */ |
862 | int diFree(struct inode *ip) | 862 | int diFree(struct inode *ip) |
863 | { | 863 | { |
864 | int rc; | 864 | int rc; |
865 | ino_t inum = ip->i_ino; | 865 | ino_t inum = ip->i_ino; |
866 | struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; | 866 | struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; |
867 | struct metapage *mp, *amp, *bmp, *cmp, *dmp; | 867 | struct metapage *mp, *amp, *bmp, *cmp, *dmp; |
868 | int iagno, ino, extno, bitno, sword, agno; | 868 | int iagno, ino, extno, bitno, sword, agno; |
869 | int back, fwd; | 869 | int back, fwd; |
870 | u32 bitmap, mask; | 870 | u32 bitmap, mask; |
871 | struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; | 871 | struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; |
872 | struct inomap *imap = JFS_IP(ipimap)->i_imap; | 872 | struct inomap *imap = JFS_IP(ipimap)->i_imap; |
873 | pxd_t freepxd; | 873 | pxd_t freepxd; |
874 | tid_t tid; | 874 | tid_t tid; |
875 | struct inode *iplist[3]; | 875 | struct inode *iplist[3]; |
876 | struct tlock *tlck; | 876 | struct tlock *tlck; |
877 | struct pxd_lock *pxdlock; | 877 | struct pxd_lock *pxdlock; |
878 | 878 | ||
879 | /* | 879 | /* |
880 | * This is just to suppress compiler warnings. The same logic that | 880 | * This is just to suppress compiler warnings. The same logic that |
881 | * references these variables is used to initialize them. | 881 | * references these variables is used to initialize them. |
882 | */ | 882 | */ |
883 | aiagp = biagp = ciagp = diagp = NULL; | 883 | aiagp = biagp = ciagp = diagp = NULL; |
884 | 884 | ||
885 | /* get the iag number containing the inode. | 885 | /* get the iag number containing the inode. |
886 | */ | 886 | */ |
887 | iagno = INOTOIAG(inum); | 887 | iagno = INOTOIAG(inum); |
888 | 888 | ||
889 | /* make sure that the iag is contained within | 889 | /* make sure that the iag is contained within |
890 | * the map. | 890 | * the map. |
891 | */ | 891 | */ |
892 | if (iagno >= imap->im_nextiag) { | 892 | if (iagno >= imap->im_nextiag) { |
893 | print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, | 893 | print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, |
894 | imap, 32, 0); | 894 | imap, 32, 0); |
895 | jfs_error(ip->i_sb, | 895 | jfs_error(ip->i_sb, |
896 | "diFree: inum = %d, iagno = %d, nextiag = %d", | 896 | "diFree: inum = %d, iagno = %d, nextiag = %d", |
897 | (uint) inum, iagno, imap->im_nextiag); | 897 | (uint) inum, iagno, imap->im_nextiag); |
898 | return -EIO; | 898 | return -EIO; |
899 | } | 899 | } |
900 | 900 | ||
901 | /* get the allocation group for this ino. | 901 | /* get the allocation group for this ino. |
902 | */ | 902 | */ |
903 | agno = JFS_IP(ip)->agno; | 903 | agno = JFS_IP(ip)->agno; |
904 | 904 | ||
905 | /* Lock the AG specific inode map information | 905 | /* Lock the AG specific inode map information |
906 | */ | 906 | */ |
907 | AG_LOCK(imap, agno); | 907 | AG_LOCK(imap, agno); |
908 | 908 | ||
909 | /* Obtain read lock in imap inode. Don't release it until we have | 909 | /* Obtain read lock in imap inode. Don't release it until we have |
910 | * read all of the IAG's that we are going to. | 910 | * read all of the IAG's that we are going to. |
911 | */ | 911 | */ |
912 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); | 912 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
913 | 913 | ||
914 | /* read the iag. | 914 | /* read the iag. |
915 | */ | 915 | */ |
916 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 916 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
917 | IREAD_UNLOCK(ipimap); | 917 | IREAD_UNLOCK(ipimap); |
918 | AG_UNLOCK(imap, agno); | 918 | AG_UNLOCK(imap, agno); |
919 | return (rc); | 919 | return (rc); |
920 | } | 920 | } |
921 | iagp = (struct iag *) mp->data; | 921 | iagp = (struct iag *) mp->data; |
922 | 922 | ||
923 | /* get the inode number and extent number of the inode within | 923 | /* get the inode number and extent number of the inode within |
924 | * the iag and the inode number within the extent. | 924 | * the iag and the inode number within the extent. |
925 | */ | 925 | */ |
926 | ino = inum & (INOSPERIAG - 1); | 926 | ino = inum & (INOSPERIAG - 1); |
927 | extno = ino >> L2INOSPEREXT; | 927 | extno = ino >> L2INOSPEREXT; |
928 | bitno = ino & (INOSPEREXT - 1); | 928 | bitno = ino & (INOSPEREXT - 1); |
929 | mask = HIGHORDER >> bitno; | 929 | mask = HIGHORDER >> bitno; |
930 | 930 | ||
931 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 931 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
932 | jfs_error(ip->i_sb, | 932 | jfs_error(ip->i_sb, |
933 | "diFree: wmap shows inode already free"); | 933 | "diFree: wmap shows inode already free"); |
934 | } | 934 | } |
935 | 935 | ||
936 | if (!addressPXD(&iagp->inoext[extno])) { | 936 | if (!addressPXD(&iagp->inoext[extno])) { |
937 | release_metapage(mp); | 937 | release_metapage(mp); |
938 | IREAD_UNLOCK(ipimap); | 938 | IREAD_UNLOCK(ipimap); |
939 | AG_UNLOCK(imap, agno); | 939 | AG_UNLOCK(imap, agno); |
940 | jfs_error(ip->i_sb, "diFree: invalid inoext"); | 940 | jfs_error(ip->i_sb, "diFree: invalid inoext"); |
941 | return -EIO; | 941 | return -EIO; |
942 | } | 942 | } |
943 | 943 | ||
944 | /* compute the bitmap for the extent reflecting the freed inode. | 944 | /* compute the bitmap for the extent reflecting the freed inode. |
945 | */ | 945 | */ |
946 | bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; | 946 | bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; |
947 | 947 | ||
948 | if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { | 948 | if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { |
949 | release_metapage(mp); | 949 | release_metapage(mp); |
950 | IREAD_UNLOCK(ipimap); | 950 | IREAD_UNLOCK(ipimap); |
951 | AG_UNLOCK(imap, agno); | 951 | AG_UNLOCK(imap, agno); |
952 | jfs_error(ip->i_sb, "diFree: numfree > numinos"); | 952 | jfs_error(ip->i_sb, "diFree: numfree > numinos"); |
953 | return -EIO; | 953 | return -EIO; |
954 | } | 954 | } |
955 | /* | 955 | /* |
956 | * inode extent still has some inodes or below low water mark: | 956 | * inode extent still has some inodes or below low water mark: |
957 | * keep the inode extent; | 957 | * keep the inode extent; |
958 | */ | 958 | */ |
959 | if (bitmap || | 959 | if (bitmap || |
960 | imap->im_agctl[agno].numfree < 96 || | 960 | imap->im_agctl[agno].numfree < 96 || |
961 | (imap->im_agctl[agno].numfree < 288 && | 961 | (imap->im_agctl[agno].numfree < 288 && |
962 | (((imap->im_agctl[agno].numfree * 100) / | 962 | (((imap->im_agctl[agno].numfree * 100) / |
963 | imap->im_agctl[agno].numinos) <= 25))) { | 963 | imap->im_agctl[agno].numinos) <= 25))) { |
964 | /* if the iag currently has no free inodes (i.e., | 964 | /* if the iag currently has no free inodes (i.e., |
965 | * the inode being freed is the first free inode of iag), | 965 | * the inode being freed is the first free inode of iag), |
966 | * insert the iag at head of the inode free list for the ag. | 966 | * insert the iag at head of the inode free list for the ag. |
967 | */ | 967 | */ |
968 | if (iagp->nfreeinos == 0) { | 968 | if (iagp->nfreeinos == 0) { |
969 | /* check if there are any iags on the ag inode | 969 | /* check if there are any iags on the ag inode |
970 | * free list. if so, read the first one so that | 970 | * free list. if so, read the first one so that |
971 | * we can link the current iag onto the list at | 971 | * we can link the current iag onto the list at |
972 | * the head. | 972 | * the head. |
973 | */ | 973 | */ |
974 | if ((fwd = imap->im_agctl[agno].inofree) >= 0) { | 974 | if ((fwd = imap->im_agctl[agno].inofree) >= 0) { |
975 | /* read the iag that currently is the head | 975 | /* read the iag that currently is the head |
976 | * of the list. | 976 | * of the list. |
977 | */ | 977 | */ |
978 | if ((rc = diIAGRead(imap, fwd, &))) { | 978 | if ((rc = diIAGRead(imap, fwd, &))) { |
979 | IREAD_UNLOCK(ipimap); | 979 | IREAD_UNLOCK(ipimap); |
980 | AG_UNLOCK(imap, agno); | 980 | AG_UNLOCK(imap, agno); |
981 | release_metapage(mp); | 981 | release_metapage(mp); |
982 | return (rc); | 982 | return (rc); |
983 | } | 983 | } |
984 | aiagp = (struct iag *) amp->data; | 984 | aiagp = (struct iag *) amp->data; |
985 | 985 | ||
986 | /* make current head point back to the iag. | 986 | /* make current head point back to the iag. |
987 | */ | 987 | */ |
988 | aiagp->inofreeback = cpu_to_le32(iagno); | 988 | aiagp->inofreeback = cpu_to_le32(iagno); |
989 | 989 | ||
990 | write_metapage(amp); | 990 | write_metapage(amp); |
991 | } | 991 | } |
992 | 992 | ||
993 | /* iag points forward to current head and iag | 993 | /* iag points forward to current head and iag |
994 | * becomes the new head of the list. | 994 | * becomes the new head of the list. |
995 | */ | 995 | */ |
996 | iagp->inofreefwd = | 996 | iagp->inofreefwd = |
997 | cpu_to_le32(imap->im_agctl[agno].inofree); | 997 | cpu_to_le32(imap->im_agctl[agno].inofree); |
998 | iagp->inofreeback = cpu_to_le32(-1); | 998 | iagp->inofreeback = cpu_to_le32(-1); |
999 | imap->im_agctl[agno].inofree = iagno; | 999 | imap->im_agctl[agno].inofree = iagno; |
1000 | } | 1000 | } |
1001 | IREAD_UNLOCK(ipimap); | 1001 | IREAD_UNLOCK(ipimap); |
1002 | 1002 | ||
1003 | /* update the free inode summary map for the extent if | 1003 | /* update the free inode summary map for the extent if |
1004 | * freeing the inode means the extent will now have free | 1004 | * freeing the inode means the extent will now have free |
1005 | * inodes (i.e., the inode being freed is the first free | 1005 | * inodes (i.e., the inode being freed is the first free |
1006 | * inode of extent), | 1006 | * inode of extent), |
1007 | */ | 1007 | */ |
1008 | if (iagp->wmap[extno] == cpu_to_le32(ONES)) { | 1008 | if (iagp->wmap[extno] == cpu_to_le32(ONES)) { |
1009 | sword = extno >> L2EXTSPERSUM; | 1009 | sword = extno >> L2EXTSPERSUM; |
1010 | bitno = extno & (EXTSPERSUM - 1); | 1010 | bitno = extno & (EXTSPERSUM - 1); |
1011 | iagp->inosmap[sword] &= | 1011 | iagp->inosmap[sword] &= |
1012 | cpu_to_le32(~(HIGHORDER >> bitno)); | 1012 | cpu_to_le32(~(HIGHORDER >> bitno)); |
1013 | } | 1013 | } |
1014 | 1014 | ||
1015 | /* update the bitmap. | 1015 | /* update the bitmap. |
1016 | */ | 1016 | */ |
1017 | iagp->wmap[extno] = cpu_to_le32(bitmap); | 1017 | iagp->wmap[extno] = cpu_to_le32(bitmap); |
1018 | 1018 | ||
1019 | /* update the free inode counts at the iag, ag and | 1019 | /* update the free inode counts at the iag, ag and |
1020 | * map level. | 1020 | * map level. |
1021 | */ | 1021 | */ |
1022 | iagp->nfreeinos = | 1022 | le32_add_cpu(&iagp->nfreeinos, 1); |
1023 | cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1); | ||
1024 | imap->im_agctl[agno].numfree += 1; | 1023 | imap->im_agctl[agno].numfree += 1; |
1025 | atomic_inc(&imap->im_numfree); | 1024 | atomic_inc(&imap->im_numfree); |
1026 | 1025 | ||
1027 | /* release the AG inode map lock | 1026 | /* release the AG inode map lock |
1028 | */ | 1027 | */ |
1029 | AG_UNLOCK(imap, agno); | 1028 | AG_UNLOCK(imap, agno); |
1030 | 1029 | ||
1031 | /* write the iag */ | 1030 | /* write the iag */ |
1032 | write_metapage(mp); | 1031 | write_metapage(mp); |
1033 | 1032 | ||
1034 | return (0); | 1033 | return (0); |
1035 | } | 1034 | } |
1036 | 1035 | ||
1037 | 1036 | ||
1038 | /* | 1037 | /* |
1039 | * inode extent has become free and above low water mark: | 1038 | * inode extent has become free and above low water mark: |
1040 | * free the inode extent; | 1039 | * free the inode extent; |
1041 | */ | 1040 | */ |
1042 | 1041 | ||
1043 | /* | 1042 | /* |
1044 | * prepare to update iag list(s) (careful update step 1) | 1043 | * prepare to update iag list(s) (careful update step 1) |
1045 | */ | 1044 | */ |
1046 | amp = bmp = cmp = dmp = NULL; | 1045 | amp = bmp = cmp = dmp = NULL; |
1047 | fwd = back = -1; | 1046 | fwd = back = -1; |
1048 | 1047 | ||
1049 | /* check if the iag currently has no free extents. if so, | 1048 | /* check if the iag currently has no free extents. if so, |
1050 | * it will be placed on the head of the ag extent free list. | 1049 | * it will be placed on the head of the ag extent free list. |
1051 | */ | 1050 | */ |
1052 | if (iagp->nfreeexts == 0) { | 1051 | if (iagp->nfreeexts == 0) { |
1053 | /* check if the ag extent free list has any iags. | 1052 | /* check if the ag extent free list has any iags. |
1054 | * if so, read the iag at the head of the list now. | 1053 | * if so, read the iag at the head of the list now. |
1055 | * this (head) iag will be updated later to reflect | 1054 | * this (head) iag will be updated later to reflect |
1056 | * the addition of the current iag at the head of | 1055 | * the addition of the current iag at the head of |
1057 | * the list. | 1056 | * the list. |
1058 | */ | 1057 | */ |
1059 | if ((fwd = imap->im_agctl[agno].extfree) >= 0) { | 1058 | if ((fwd = imap->im_agctl[agno].extfree) >= 0) { |
1060 | if ((rc = diIAGRead(imap, fwd, &))) | 1059 | if ((rc = diIAGRead(imap, fwd, &))) |
1061 | goto error_out; | 1060 | goto error_out; |
1062 | aiagp = (struct iag *) amp->data; | 1061 | aiagp = (struct iag *) amp->data; |
1063 | } | 1062 | } |
1064 | } else { | 1063 | } else { |
1065 | /* iag has free extents. check if the addition of a free | 1064 | /* iag has free extents. check if the addition of a free |
1066 | * extent will cause all extents to be free within this | 1065 | * extent will cause all extents to be free within this |
1067 | * iag. if so, the iag will be removed from the ag extent | 1066 | * iag. if so, the iag will be removed from the ag extent |
1068 | * free list and placed on the inode map's free iag list. | 1067 | * free list and placed on the inode map's free iag list. |
1069 | */ | 1068 | */ |
1070 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { | 1069 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { |
1071 | /* in preparation for removing the iag from the | 1070 | /* in preparation for removing the iag from the |
1072 | * ag extent free list, read the iags preceeding | 1071 | * ag extent free list, read the iags preceeding |
1073 | * and following the iag on the ag extent free | 1072 | * and following the iag on the ag extent free |
1074 | * list. | 1073 | * list. |
1075 | */ | 1074 | */ |
1076 | if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { | 1075 | if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { |
1077 | if ((rc = diIAGRead(imap, fwd, &))) | 1076 | if ((rc = diIAGRead(imap, fwd, &))) |
1078 | goto error_out; | 1077 | goto error_out; |
1079 | aiagp = (struct iag *) amp->data; | 1078 | aiagp = (struct iag *) amp->data; |
1080 | } | 1079 | } |
1081 | 1080 | ||
1082 | if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { | 1081 | if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { |
1083 | if ((rc = diIAGRead(imap, back, &bmp))) | 1082 | if ((rc = diIAGRead(imap, back, &bmp))) |
1084 | goto error_out; | 1083 | goto error_out; |
1085 | biagp = (struct iag *) bmp->data; | 1084 | biagp = (struct iag *) bmp->data; |
1086 | } | 1085 | } |
1087 | } | 1086 | } |
1088 | } | 1087 | } |
1089 | 1088 | ||
1090 | /* remove the iag from the ag inode free list if freeing | 1089 | /* remove the iag from the ag inode free list if freeing |
1091 | * this extent cause the iag to have no free inodes. | 1090 | * this extent cause the iag to have no free inodes. |
1092 | */ | 1091 | */ |
1093 | if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { | 1092 | if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { |
1094 | int inofreeback = le32_to_cpu(iagp->inofreeback); | 1093 | int inofreeback = le32_to_cpu(iagp->inofreeback); |
1095 | int inofreefwd = le32_to_cpu(iagp->inofreefwd); | 1094 | int inofreefwd = le32_to_cpu(iagp->inofreefwd); |
1096 | 1095 | ||
1097 | /* in preparation for removing the iag from the | 1096 | /* in preparation for removing the iag from the |
1098 | * ag inode free list, read the iags preceeding | 1097 | * ag inode free list, read the iags preceeding |
1099 | * and following the iag on the ag inode free | 1098 | * and following the iag on the ag inode free |
1100 | * list. before reading these iags, we must make | 1099 | * list. before reading these iags, we must make |
1101 | * sure that we already don't have them in hand | 1100 | * sure that we already don't have them in hand |
1102 | * from up above, since re-reading an iag (buffer) | 1101 | * from up above, since re-reading an iag (buffer) |
1103 | * we are currently holding would cause a deadlock. | 1102 | * we are currently holding would cause a deadlock. |
1104 | */ | 1103 | */ |
1105 | if (inofreefwd >= 0) { | 1104 | if (inofreefwd >= 0) { |
1106 | 1105 | ||
1107 | if (inofreefwd == fwd) | 1106 | if (inofreefwd == fwd) |
1108 | ciagp = (struct iag *) amp->data; | 1107 | ciagp = (struct iag *) amp->data; |
1109 | else if (inofreefwd == back) | 1108 | else if (inofreefwd == back) |
1110 | ciagp = (struct iag *) bmp->data; | 1109 | ciagp = (struct iag *) bmp->data; |
1111 | else { | 1110 | else { |
1112 | if ((rc = | 1111 | if ((rc = |
1113 | diIAGRead(imap, inofreefwd, &cmp))) | 1112 | diIAGRead(imap, inofreefwd, &cmp))) |
1114 | goto error_out; | 1113 | goto error_out; |
1115 | ciagp = (struct iag *) cmp->data; | 1114 | ciagp = (struct iag *) cmp->data; |
1116 | } | 1115 | } |
1117 | assert(ciagp != NULL); | 1116 | assert(ciagp != NULL); |
1118 | } | 1117 | } |
1119 | 1118 | ||
1120 | if (inofreeback >= 0) { | 1119 | if (inofreeback >= 0) { |
1121 | if (inofreeback == fwd) | 1120 | if (inofreeback == fwd) |
1122 | diagp = (struct iag *) amp->data; | 1121 | diagp = (struct iag *) amp->data; |
1123 | else if (inofreeback == back) | 1122 | else if (inofreeback == back) |
1124 | diagp = (struct iag *) bmp->data; | 1123 | diagp = (struct iag *) bmp->data; |
1125 | else { | 1124 | else { |
1126 | if ((rc = | 1125 | if ((rc = |
1127 | diIAGRead(imap, inofreeback, &dmp))) | 1126 | diIAGRead(imap, inofreeback, &dmp))) |
1128 | goto error_out; | 1127 | goto error_out; |
1129 | diagp = (struct iag *) dmp->data; | 1128 | diagp = (struct iag *) dmp->data; |
1130 | } | 1129 | } |
1131 | assert(diagp != NULL); | 1130 | assert(diagp != NULL); |
1132 | } | 1131 | } |
1133 | } | 1132 | } |
1134 | 1133 | ||
1135 | IREAD_UNLOCK(ipimap); | 1134 | IREAD_UNLOCK(ipimap); |
1136 | 1135 | ||
1137 | /* | 1136 | /* |
1138 | * invalidate any page of the inode extent freed from buffer cache; | 1137 | * invalidate any page of the inode extent freed from buffer cache; |
1139 | */ | 1138 | */ |
1140 | freepxd = iagp->inoext[extno]; | 1139 | freepxd = iagp->inoext[extno]; |
1141 | invalidate_pxd_metapages(ip, freepxd); | 1140 | invalidate_pxd_metapages(ip, freepxd); |
1142 | 1141 | ||
1143 | /* | 1142 | /* |
1144 | * update iag list(s) (careful update step 2) | 1143 | * update iag list(s) (careful update step 2) |
1145 | */ | 1144 | */ |
1146 | /* add the iag to the ag extent free list if this is the | 1145 | /* add the iag to the ag extent free list if this is the |
1147 | * first free extent for the iag. | 1146 | * first free extent for the iag. |
1148 | */ | 1147 | */ |
1149 | if (iagp->nfreeexts == 0) { | 1148 | if (iagp->nfreeexts == 0) { |
1150 | if (fwd >= 0) | 1149 | if (fwd >= 0) |
1151 | aiagp->extfreeback = cpu_to_le32(iagno); | 1150 | aiagp->extfreeback = cpu_to_le32(iagno); |
1152 | 1151 | ||
1153 | iagp->extfreefwd = | 1152 | iagp->extfreefwd = |
1154 | cpu_to_le32(imap->im_agctl[agno].extfree); | 1153 | cpu_to_le32(imap->im_agctl[agno].extfree); |
1155 | iagp->extfreeback = cpu_to_le32(-1); | 1154 | iagp->extfreeback = cpu_to_le32(-1); |
1156 | imap->im_agctl[agno].extfree = iagno; | 1155 | imap->im_agctl[agno].extfree = iagno; |
1157 | } else { | 1156 | } else { |
1158 | /* remove the iag from the ag extent list if all extents | 1157 | /* remove the iag from the ag extent list if all extents |
1159 | * are now free and place it on the inode map iag free list. | 1158 | * are now free and place it on the inode map iag free list. |
1160 | */ | 1159 | */ |
1161 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { | 1160 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { |
1162 | if (fwd >= 0) | 1161 | if (fwd >= 0) |
1163 | aiagp->extfreeback = iagp->extfreeback; | 1162 | aiagp->extfreeback = iagp->extfreeback; |
1164 | 1163 | ||
1165 | if (back >= 0) | 1164 | if (back >= 0) |
1166 | biagp->extfreefwd = iagp->extfreefwd; | 1165 | biagp->extfreefwd = iagp->extfreefwd; |
1167 | else | 1166 | else |
1168 | imap->im_agctl[agno].extfree = | 1167 | imap->im_agctl[agno].extfree = |
1169 | le32_to_cpu(iagp->extfreefwd); | 1168 | le32_to_cpu(iagp->extfreefwd); |
1170 | 1169 | ||
1171 | iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); | 1170 | iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); |
1172 | 1171 | ||
1173 | IAGFREE_LOCK(imap); | 1172 | IAGFREE_LOCK(imap); |
1174 | iagp->iagfree = cpu_to_le32(imap->im_freeiag); | 1173 | iagp->iagfree = cpu_to_le32(imap->im_freeiag); |
1175 | imap->im_freeiag = iagno; | 1174 | imap->im_freeiag = iagno; |
1176 | IAGFREE_UNLOCK(imap); | 1175 | IAGFREE_UNLOCK(imap); |
1177 | } | 1176 | } |
1178 | } | 1177 | } |
1179 | 1178 | ||
1180 | /* remove the iag from the ag inode free list if freeing | 1179 | /* remove the iag from the ag inode free list if freeing |
1181 | * this extent causes the iag to have no free inodes. | 1180 | * this extent causes the iag to have no free inodes. |
1182 | */ | 1181 | */ |
1183 | if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { | 1182 | if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { |
1184 | if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) | 1183 | if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) |
1185 | ciagp->inofreeback = iagp->inofreeback; | 1184 | ciagp->inofreeback = iagp->inofreeback; |
1186 | 1185 | ||
1187 | if ((int) le32_to_cpu(iagp->inofreeback) >= 0) | 1186 | if ((int) le32_to_cpu(iagp->inofreeback) >= 0) |
1188 | diagp->inofreefwd = iagp->inofreefwd; | 1187 | diagp->inofreefwd = iagp->inofreefwd; |
1189 | else | 1188 | else |
1190 | imap->im_agctl[agno].inofree = | 1189 | imap->im_agctl[agno].inofree = |
1191 | le32_to_cpu(iagp->inofreefwd); | 1190 | le32_to_cpu(iagp->inofreefwd); |
1192 | 1191 | ||
1193 | iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); | 1192 | iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); |
1194 | } | 1193 | } |
1195 | 1194 | ||
1196 | /* update the inode extent address and working map | 1195 | /* update the inode extent address and working map |
1197 | * to reflect the free extent. | 1196 | * to reflect the free extent. |
1198 | * the permanent map should have been updated already | 1197 | * the permanent map should have been updated already |
1199 | * for the inode being freed. | 1198 | * for the inode being freed. |
1200 | */ | 1199 | */ |
1201 | if (iagp->pmap[extno] != 0) { | 1200 | if (iagp->pmap[extno] != 0) { |
1202 | jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); | 1201 | jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); |
1203 | } | 1202 | } |
1204 | iagp->wmap[extno] = 0; | 1203 | iagp->wmap[extno] = 0; |
1205 | PXDlength(&iagp->inoext[extno], 0); | 1204 | PXDlength(&iagp->inoext[extno], 0); |
1206 | PXDaddress(&iagp->inoext[extno], 0); | 1205 | PXDaddress(&iagp->inoext[extno], 0); |
1207 | 1206 | ||
1208 | /* update the free extent and free inode summary maps | 1207 | /* update the free extent and free inode summary maps |
1209 | * to reflect the freed extent. | 1208 | * to reflect the freed extent. |
1210 | * the inode summary map is marked to indicate no inodes | 1209 | * the inode summary map is marked to indicate no inodes |
1211 | * available for the freed extent. | 1210 | * available for the freed extent. |
1212 | */ | 1211 | */ |
1213 | sword = extno >> L2EXTSPERSUM; | 1212 | sword = extno >> L2EXTSPERSUM; |
1214 | bitno = extno & (EXTSPERSUM - 1); | 1213 | bitno = extno & (EXTSPERSUM - 1); |
1215 | mask = HIGHORDER >> bitno; | 1214 | mask = HIGHORDER >> bitno; |
1216 | iagp->inosmap[sword] |= cpu_to_le32(mask); | 1215 | iagp->inosmap[sword] |= cpu_to_le32(mask); |
1217 | iagp->extsmap[sword] &= cpu_to_le32(~mask); | 1216 | iagp->extsmap[sword] &= cpu_to_le32(~mask); |
1218 | 1217 | ||
1219 | /* update the number of free inodes and number of free extents | 1218 | /* update the number of free inodes and number of free extents |
1220 | * for the iag. | 1219 | * for the iag. |
1221 | */ | 1220 | */ |
1222 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - | 1221 | le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); |
1223 | (INOSPEREXT - 1)); | 1222 | le32_add_cpu(&iagp->nfreeexts, 1); |
1224 | iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1); | ||
1225 | 1223 | ||
1226 | /* update the number of free inodes and backed inodes | 1224 | /* update the number of free inodes and backed inodes |
1227 | * at the ag and inode map level. | 1225 | * at the ag and inode map level. |
1228 | */ | 1226 | */ |
1229 | imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); | 1227 | imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); |
1230 | imap->im_agctl[agno].numinos -= INOSPEREXT; | 1228 | imap->im_agctl[agno].numinos -= INOSPEREXT; |
1231 | atomic_sub(INOSPEREXT - 1, &imap->im_numfree); | 1229 | atomic_sub(INOSPEREXT - 1, &imap->im_numfree); |
1232 | atomic_sub(INOSPEREXT, &imap->im_numinos); | 1230 | atomic_sub(INOSPEREXT, &imap->im_numinos); |
1233 | 1231 | ||
1234 | if (amp) | 1232 | if (amp) |
1235 | write_metapage(amp); | 1233 | write_metapage(amp); |
1236 | if (bmp) | 1234 | if (bmp) |
1237 | write_metapage(bmp); | 1235 | write_metapage(bmp); |
1238 | if (cmp) | 1236 | if (cmp) |
1239 | write_metapage(cmp); | 1237 | write_metapage(cmp); |
1240 | if (dmp) | 1238 | if (dmp) |
1241 | write_metapage(dmp); | 1239 | write_metapage(dmp); |
1242 | 1240 | ||
1243 | /* | 1241 | /* |
1244 | * start transaction to update block allocation map | 1242 | * start transaction to update block allocation map |
1245 | * for the inode extent freed; | 1243 | * for the inode extent freed; |
1246 | * | 1244 | * |
1247 | * N.B. AG_LOCK is released and iag will be released below, and | 1245 | * N.B. AG_LOCK is released and iag will be released below, and |
1248 | * other thread may allocate inode from/reusing the ixad freed | 1246 | * other thread may allocate inode from/reusing the ixad freed |
1249 | * BUT with new/different backing inode extent from the extent | 1247 | * BUT with new/different backing inode extent from the extent |
1250 | * to be freed by the transaction; | 1248 | * to be freed by the transaction; |
1251 | */ | 1249 | */ |
1252 | tid = txBegin(ipimap->i_sb, COMMIT_FORCE); | 1250 | tid = txBegin(ipimap->i_sb, COMMIT_FORCE); |
1253 | mutex_lock(&JFS_IP(ipimap)->commit_mutex); | 1251 | mutex_lock(&JFS_IP(ipimap)->commit_mutex); |
1254 | 1252 | ||
1255 | /* acquire tlock of the iag page of the freed ixad | 1253 | /* acquire tlock of the iag page of the freed ixad |
1256 | * to force the page NOHOMEOK (even though no data is | 1254 | * to force the page NOHOMEOK (even though no data is |
1257 | * logged from the iag page) until NOREDOPAGE|FREEXTENT log | 1255 | * logged from the iag page) until NOREDOPAGE|FREEXTENT log |
1258 | * for the free of the extent is committed; | 1256 | * for the free of the extent is committed; |
1259 | * write FREEXTENT|NOREDOPAGE log record | 1257 | * write FREEXTENT|NOREDOPAGE log record |
1260 | * N.B. linelock is overlaid as freed extent descriptor; | 1258 | * N.B. linelock is overlaid as freed extent descriptor; |
1261 | */ | 1259 | */ |
1262 | tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); | 1260 | tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); |
1263 | pxdlock = (struct pxd_lock *) & tlck->lock; | 1261 | pxdlock = (struct pxd_lock *) & tlck->lock; |
1264 | pxdlock->flag = mlckFREEPXD; | 1262 | pxdlock->flag = mlckFREEPXD; |
1265 | pxdlock->pxd = freepxd; | 1263 | pxdlock->pxd = freepxd; |
1266 | pxdlock->index = 1; | 1264 | pxdlock->index = 1; |
1267 | 1265 | ||
1268 | write_metapage(mp); | 1266 | write_metapage(mp); |
1269 | 1267 | ||
1270 | iplist[0] = ipimap; | 1268 | iplist[0] = ipimap; |
1271 | 1269 | ||
1272 | /* | 1270 | /* |
1273 | * logredo needs the IAG number and IAG extent index in order | 1271 | * logredo needs the IAG number and IAG extent index in order |
1274 | * to ensure that the IMap is consistent. The least disruptive | 1272 | * to ensure that the IMap is consistent. The least disruptive |
1275 | * way to pass these values through to the transaction manager | 1273 | * way to pass these values through to the transaction manager |
1276 | * is in the iplist array. | 1274 | * is in the iplist array. |
1277 | * | 1275 | * |
1278 | * It's not pretty, but it works. | 1276 | * It's not pretty, but it works. |
1279 | */ | 1277 | */ |
1280 | iplist[1] = (struct inode *) (size_t)iagno; | 1278 | iplist[1] = (struct inode *) (size_t)iagno; |
1281 | iplist[2] = (struct inode *) (size_t)extno; | 1279 | iplist[2] = (struct inode *) (size_t)extno; |
1282 | 1280 | ||
1283 | rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); | 1281 | rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); |
1284 | 1282 | ||
1285 | txEnd(tid); | 1283 | txEnd(tid); |
1286 | mutex_unlock(&JFS_IP(ipimap)->commit_mutex); | 1284 | mutex_unlock(&JFS_IP(ipimap)->commit_mutex); |
1287 | 1285 | ||
1288 | /* unlock the AG inode map information */ | 1286 | /* unlock the AG inode map information */ |
1289 | AG_UNLOCK(imap, agno); | 1287 | AG_UNLOCK(imap, agno); |
1290 | 1288 | ||
1291 | return (0); | 1289 | return (0); |
1292 | 1290 | ||
1293 | error_out: | 1291 | error_out: |
1294 | IREAD_UNLOCK(ipimap); | 1292 | IREAD_UNLOCK(ipimap); |
1295 | 1293 | ||
1296 | if (amp) | 1294 | if (amp) |
1297 | release_metapage(amp); | 1295 | release_metapage(amp); |
1298 | if (bmp) | 1296 | if (bmp) |
1299 | release_metapage(bmp); | 1297 | release_metapage(bmp); |
1300 | if (cmp) | 1298 | if (cmp) |
1301 | release_metapage(cmp); | 1299 | release_metapage(cmp); |
1302 | if (dmp) | 1300 | if (dmp) |
1303 | release_metapage(dmp); | 1301 | release_metapage(dmp); |
1304 | 1302 | ||
1305 | AG_UNLOCK(imap, agno); | 1303 | AG_UNLOCK(imap, agno); |
1306 | 1304 | ||
1307 | release_metapage(mp); | 1305 | release_metapage(mp); |
1308 | 1306 | ||
1309 | return (rc); | 1307 | return (rc); |
1310 | } | 1308 | } |
1311 | 1309 | ||
1312 | /* | 1310 | /* |
1313 | * There are several places in the diAlloc* routines where we initialize | 1311 | * There are several places in the diAlloc* routines where we initialize |
1314 | * the inode. | 1312 | * the inode. |
1315 | */ | 1313 | */ |
1316 | static inline void | 1314 | static inline void |
1317 | diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) | 1315 | diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) |
1318 | { | 1316 | { |
1319 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | 1317 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); |
1320 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 1318 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
1321 | 1319 | ||
1322 | ip->i_ino = (iagno << L2INOSPERIAG) + ino; | 1320 | ip->i_ino = (iagno << L2INOSPERIAG) + ino; |
1323 | jfs_ip->ixpxd = iagp->inoext[extno]; | 1321 | jfs_ip->ixpxd = iagp->inoext[extno]; |
1324 | jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); | 1322 | jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); |
1325 | jfs_ip->active_ag = -1; | 1323 | jfs_ip->active_ag = -1; |
1326 | } | 1324 | } |
1327 | 1325 | ||
1328 | 1326 | ||
1329 | /* | 1327 | /* |
1330 | * NAME: diAlloc(pip,dir,ip) | 1328 | * NAME: diAlloc(pip,dir,ip) |
1331 | * | 1329 | * |
1332 | * FUNCTION: allocate a disk inode from the inode working map | 1330 | * FUNCTION: allocate a disk inode from the inode working map |
1333 | * for a fileset or aggregate. | 1331 | * for a fileset or aggregate. |
1334 | * | 1332 | * |
1335 | * PARAMETERS: | 1333 | * PARAMETERS: |
1336 | * pip - pointer to incore inode for the parent inode. | 1334 | * pip - pointer to incore inode for the parent inode. |
1337 | * dir - 'true' if the new disk inode is for a directory. | 1335 | * dir - 'true' if the new disk inode is for a directory. |
1338 | * ip - pointer to a new inode | 1336 | * ip - pointer to a new inode |
1339 | * | 1337 | * |
1340 | * RETURN VALUES: | 1338 | * RETURN VALUES: |
1341 | * 0 - success. | 1339 | * 0 - success. |
1342 | * -ENOSPC - insufficient disk resources. | 1340 | * -ENOSPC - insufficient disk resources. |
1343 | * -EIO - i/o error. | 1341 | * -EIO - i/o error. |
1344 | */ | 1342 | */ |
1345 | int diAlloc(struct inode *pip, bool dir, struct inode *ip) | 1343 | int diAlloc(struct inode *pip, bool dir, struct inode *ip) |
1346 | { | 1344 | { |
1347 | int rc, ino, iagno, addext, extno, bitno, sword; | 1345 | int rc, ino, iagno, addext, extno, bitno, sword; |
1348 | int nwords, rem, i, agno; | 1346 | int nwords, rem, i, agno; |
1349 | u32 mask, inosmap, extsmap; | 1347 | u32 mask, inosmap, extsmap; |
1350 | struct inode *ipimap; | 1348 | struct inode *ipimap; |
1351 | struct metapage *mp; | 1349 | struct metapage *mp; |
1352 | ino_t inum; | 1350 | ino_t inum; |
1353 | struct iag *iagp; | 1351 | struct iag *iagp; |
1354 | struct inomap *imap; | 1352 | struct inomap *imap; |
1355 | 1353 | ||
1356 | /* get the pointers to the inode map inode and the | 1354 | /* get the pointers to the inode map inode and the |
1357 | * corresponding imap control structure. | 1355 | * corresponding imap control structure. |
1358 | */ | 1356 | */ |
1359 | ipimap = JFS_SBI(pip->i_sb)->ipimap; | 1357 | ipimap = JFS_SBI(pip->i_sb)->ipimap; |
1360 | imap = JFS_IP(ipimap)->i_imap; | 1358 | imap = JFS_IP(ipimap)->i_imap; |
1361 | JFS_IP(ip)->ipimap = ipimap; | 1359 | JFS_IP(ip)->ipimap = ipimap; |
1362 | JFS_IP(ip)->fileset = FILESYSTEM_I; | 1360 | JFS_IP(ip)->fileset = FILESYSTEM_I; |
1363 | 1361 | ||
1364 | /* for a directory, the allocation policy is to start | 1362 | /* for a directory, the allocation policy is to start |
1365 | * at the ag level using the preferred ag. | 1363 | * at the ag level using the preferred ag. |
1366 | */ | 1364 | */ |
1367 | if (dir) { | 1365 | if (dir) { |
1368 | agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); | 1366 | agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); |
1369 | AG_LOCK(imap, agno); | 1367 | AG_LOCK(imap, agno); |
1370 | goto tryag; | 1368 | goto tryag; |
1371 | } | 1369 | } |
1372 | 1370 | ||
1373 | /* for files, the policy starts off by trying to allocate from | 1371 | /* for files, the policy starts off by trying to allocate from |
1374 | * the same iag containing the parent disk inode: | 1372 | * the same iag containing the parent disk inode: |
1375 | * try to allocate the new disk inode close to the parent disk | 1373 | * try to allocate the new disk inode close to the parent disk |
1376 | * inode, using parent disk inode number + 1 as the allocation | 1374 | * inode, using parent disk inode number + 1 as the allocation |
1377 | * hint. (we use a left-to-right policy to attempt to avoid | 1375 | * hint. (we use a left-to-right policy to attempt to avoid |
1378 | * moving backward on the disk.) compute the hint within the | 1376 | * moving backward on the disk.) compute the hint within the |
1379 | * file system and the iag. | 1377 | * file system and the iag. |
1380 | */ | 1378 | */ |
1381 | 1379 | ||
1382 | /* get the ag number of this iag */ | 1380 | /* get the ag number of this iag */ |
1383 | agno = JFS_IP(pip)->agno; | 1381 | agno = JFS_IP(pip)->agno; |
1384 | 1382 | ||
1385 | if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { | 1383 | if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { |
1386 | /* | 1384 | /* |
1387 | * There is an open file actively growing. We want to | 1385 | * There is an open file actively growing. We want to |
1388 | * allocate new inodes from a different ag to avoid | 1386 | * allocate new inodes from a different ag to avoid |
1389 | * fragmentation problems. | 1387 | * fragmentation problems. |
1390 | */ | 1388 | */ |
1391 | agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); | 1389 | agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); |
1392 | AG_LOCK(imap, agno); | 1390 | AG_LOCK(imap, agno); |
1393 | goto tryag; | 1391 | goto tryag; |
1394 | } | 1392 | } |
1395 | 1393 | ||
1396 | inum = pip->i_ino + 1; | 1394 | inum = pip->i_ino + 1; |
1397 | ino = inum & (INOSPERIAG - 1); | 1395 | ino = inum & (INOSPERIAG - 1); |
1398 | 1396 | ||
1399 | /* back off the hint if it is outside of the iag */ | 1397 | /* back off the hint if it is outside of the iag */ |
1400 | if (ino == 0) | 1398 | if (ino == 0) |
1401 | inum = pip->i_ino; | 1399 | inum = pip->i_ino; |
1402 | 1400 | ||
1403 | /* lock the AG inode map information */ | 1401 | /* lock the AG inode map information */ |
1404 | AG_LOCK(imap, agno); | 1402 | AG_LOCK(imap, agno); |
1405 | 1403 | ||
1406 | /* Get read lock on imap inode */ | 1404 | /* Get read lock on imap inode */ |
1407 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); | 1405 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
1408 | 1406 | ||
1409 | /* get the iag number and read the iag */ | 1407 | /* get the iag number and read the iag */ |
1410 | iagno = INOTOIAG(inum); | 1408 | iagno = INOTOIAG(inum); |
1411 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 1409 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
1412 | IREAD_UNLOCK(ipimap); | 1410 | IREAD_UNLOCK(ipimap); |
1413 | AG_UNLOCK(imap, agno); | 1411 | AG_UNLOCK(imap, agno); |
1414 | return (rc); | 1412 | return (rc); |
1415 | } | 1413 | } |
1416 | iagp = (struct iag *) mp->data; | 1414 | iagp = (struct iag *) mp->data; |
1417 | 1415 | ||
1418 | /* determine if new inode extent is allowed to be added to the iag. | 1416 | /* determine if new inode extent is allowed to be added to the iag. |
1419 | * new inode extent can be added to the iag if the ag | 1417 | * new inode extent can be added to the iag if the ag |
1420 | * has less than 32 free disk inodes and the iag has free extents. | 1418 | * has less than 32 free disk inodes and the iag has free extents. |
1421 | */ | 1419 | */ |
1422 | addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); | 1420 | addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); |
1423 | 1421 | ||
1424 | /* | 1422 | /* |
1425 | * try to allocate from the IAG | 1423 | * try to allocate from the IAG |
1426 | */ | 1424 | */ |
1427 | /* check if the inode may be allocated from the iag | 1425 | /* check if the inode may be allocated from the iag |
1428 | * (i.e. the inode has free inodes or new extent can be added). | 1426 | * (i.e. the inode has free inodes or new extent can be added). |
1429 | */ | 1427 | */ |
1430 | if (iagp->nfreeinos || addext) { | 1428 | if (iagp->nfreeinos || addext) { |
1431 | /* determine the extent number of the hint. | 1429 | /* determine the extent number of the hint. |
1432 | */ | 1430 | */ |
1433 | extno = ino >> L2INOSPEREXT; | 1431 | extno = ino >> L2INOSPEREXT; |
1434 | 1432 | ||
1435 | /* check if the extent containing the hint has backed | 1433 | /* check if the extent containing the hint has backed |
1436 | * inodes. if so, try to allocate within this extent. | 1434 | * inodes. if so, try to allocate within this extent. |
1437 | */ | 1435 | */ |
1438 | if (addressPXD(&iagp->inoext[extno])) { | 1436 | if (addressPXD(&iagp->inoext[extno])) { |
1439 | bitno = ino & (INOSPEREXT - 1); | 1437 | bitno = ino & (INOSPEREXT - 1); |
1440 | if ((bitno = | 1438 | if ((bitno = |
1441 | diFindFree(le32_to_cpu(iagp->wmap[extno]), | 1439 | diFindFree(le32_to_cpu(iagp->wmap[extno]), |
1442 | bitno)) | 1440 | bitno)) |
1443 | < INOSPEREXT) { | 1441 | < INOSPEREXT) { |
1444 | ino = (extno << L2INOSPEREXT) + bitno; | 1442 | ino = (extno << L2INOSPEREXT) + bitno; |
1445 | 1443 | ||
1446 | /* a free inode (bit) was found within this | 1444 | /* a free inode (bit) was found within this |
1447 | * extent, so allocate it. | 1445 | * extent, so allocate it. |
1448 | */ | 1446 | */ |
1449 | rc = diAllocBit(imap, iagp, ino); | 1447 | rc = diAllocBit(imap, iagp, ino); |
1450 | IREAD_UNLOCK(ipimap); | 1448 | IREAD_UNLOCK(ipimap); |
1451 | if (rc) { | 1449 | if (rc) { |
1452 | assert(rc == -EIO); | 1450 | assert(rc == -EIO); |
1453 | } else { | 1451 | } else { |
1454 | /* set the results of the allocation | 1452 | /* set the results of the allocation |
1455 | * and write the iag. | 1453 | * and write the iag. |
1456 | */ | 1454 | */ |
1457 | diInitInode(ip, iagno, ino, extno, | 1455 | diInitInode(ip, iagno, ino, extno, |
1458 | iagp); | 1456 | iagp); |
1459 | mark_metapage_dirty(mp); | 1457 | mark_metapage_dirty(mp); |
1460 | } | 1458 | } |
1461 | release_metapage(mp); | 1459 | release_metapage(mp); |
1462 | 1460 | ||
1463 | /* free the AG lock and return. | 1461 | /* free the AG lock and return. |
1464 | */ | 1462 | */ |
1465 | AG_UNLOCK(imap, agno); | 1463 | AG_UNLOCK(imap, agno); |
1466 | return (rc); | 1464 | return (rc); |
1467 | } | 1465 | } |
1468 | 1466 | ||
1469 | if (!addext) | 1467 | if (!addext) |
1470 | extno = | 1468 | extno = |
1471 | (extno == | 1469 | (extno == |
1472 | EXTSPERIAG - 1) ? 0 : extno + 1; | 1470 | EXTSPERIAG - 1) ? 0 : extno + 1; |
1473 | } | 1471 | } |
1474 | 1472 | ||
1475 | /* | 1473 | /* |
1476 | * no free inodes within the extent containing the hint. | 1474 | * no free inodes within the extent containing the hint. |
1477 | * | 1475 | * |
1478 | * try to allocate from the backed extents following | 1476 | * try to allocate from the backed extents following |
1479 | * hint or, if appropriate (i.e. addext is true), allocate | 1477 | * hint or, if appropriate (i.e. addext is true), allocate |
1480 | * an extent of free inodes at or following the extent | 1478 | * an extent of free inodes at or following the extent |
1481 | * containing the hint. | 1479 | * containing the hint. |
1482 | * | 1480 | * |
1483 | * the free inode and free extent summary maps are used | 1481 | * the free inode and free extent summary maps are used |
1484 | * here, so determine the starting summary map position | 1482 | * here, so determine the starting summary map position |
1485 | * and the number of words we'll have to examine. again, | 1483 | * and the number of words we'll have to examine. again, |
1486 | * the approach is to allocate following the hint, so we | 1484 | * the approach is to allocate following the hint, so we |
1487 | * might have to initially ignore prior bits of the summary | 1485 | * might have to initially ignore prior bits of the summary |
1488 | * map that represent extents prior to the extent containing | 1486 | * map that represent extents prior to the extent containing |
1489 | * the hint and later revisit these bits. | 1487 | * the hint and later revisit these bits. |
1490 | */ | 1488 | */ |
1491 | bitno = extno & (EXTSPERSUM - 1); | 1489 | bitno = extno & (EXTSPERSUM - 1); |
1492 | nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; | 1490 | nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; |
1493 | sword = extno >> L2EXTSPERSUM; | 1491 | sword = extno >> L2EXTSPERSUM; |
1494 | 1492 | ||
1495 | /* mask any prior bits for the starting words of the | 1493 | /* mask any prior bits for the starting words of the |
1496 | * summary map. | 1494 | * summary map. |
1497 | */ | 1495 | */ |
1498 | mask = ONES << (EXTSPERSUM - bitno); | 1496 | mask = ONES << (EXTSPERSUM - bitno); |
1499 | inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; | 1497 | inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; |
1500 | extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; | 1498 | extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; |
1501 | 1499 | ||
1502 | /* scan the free inode and free extent summary maps for | 1500 | /* scan the free inode and free extent summary maps for |
1503 | * free resources. | 1501 | * free resources. |
1504 | */ | 1502 | */ |
1505 | for (i = 0; i < nwords; i++) { | 1503 | for (i = 0; i < nwords; i++) { |
1506 | /* check if this word of the free inode summary | 1504 | /* check if this word of the free inode summary |
1507 | * map describes an extent with free inodes. | 1505 | * map describes an extent with free inodes. |
1508 | */ | 1506 | */ |
1509 | if (~inosmap) { | 1507 | if (~inosmap) { |
1510 | /* an extent with free inodes has been | 1508 | /* an extent with free inodes has been |
1511 | * found. determine the extent number | 1509 | * found. determine the extent number |
1512 | * and the inode number within the extent. | 1510 | * and the inode number within the extent. |
1513 | */ | 1511 | */ |
1514 | rem = diFindFree(inosmap, 0); | 1512 | rem = diFindFree(inosmap, 0); |
1515 | extno = (sword << L2EXTSPERSUM) + rem; | 1513 | extno = (sword << L2EXTSPERSUM) + rem; |
1516 | rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), | 1514 | rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), |
1517 | 0); | 1515 | 0); |
1518 | if (rem >= INOSPEREXT) { | 1516 | if (rem >= INOSPEREXT) { |
1519 | IREAD_UNLOCK(ipimap); | 1517 | IREAD_UNLOCK(ipimap); |
1520 | release_metapage(mp); | 1518 | release_metapage(mp); |
1521 | AG_UNLOCK(imap, agno); | 1519 | AG_UNLOCK(imap, agno); |
1522 | jfs_error(ip->i_sb, | 1520 | jfs_error(ip->i_sb, |
1523 | "diAlloc: can't find free bit " | 1521 | "diAlloc: can't find free bit " |
1524 | "in wmap"); | 1522 | "in wmap"); |
1525 | return EIO; | 1523 | return EIO; |
1526 | } | 1524 | } |
1527 | 1525 | ||
1528 | /* determine the inode number within the | 1526 | /* determine the inode number within the |
1529 | * iag and allocate the inode from the | 1527 | * iag and allocate the inode from the |
1530 | * map. | 1528 | * map. |
1531 | */ | 1529 | */ |
1532 | ino = (extno << L2INOSPEREXT) + rem; | 1530 | ino = (extno << L2INOSPEREXT) + rem; |
1533 | rc = diAllocBit(imap, iagp, ino); | 1531 | rc = diAllocBit(imap, iagp, ino); |
1534 | IREAD_UNLOCK(ipimap); | 1532 | IREAD_UNLOCK(ipimap); |
1535 | if (rc) | 1533 | if (rc) |
1536 | assert(rc == -EIO); | 1534 | assert(rc == -EIO); |
1537 | else { | 1535 | else { |
1538 | /* set the results of the allocation | 1536 | /* set the results of the allocation |
1539 | * and write the iag. | 1537 | * and write the iag. |
1540 | */ | 1538 | */ |
1541 | diInitInode(ip, iagno, ino, extno, | 1539 | diInitInode(ip, iagno, ino, extno, |
1542 | iagp); | 1540 | iagp); |
1543 | mark_metapage_dirty(mp); | 1541 | mark_metapage_dirty(mp); |
1544 | } | 1542 | } |
1545 | release_metapage(mp); | 1543 | release_metapage(mp); |
1546 | 1544 | ||
1547 | /* free the AG lock and return. | 1545 | /* free the AG lock and return. |
1548 | */ | 1546 | */ |
1549 | AG_UNLOCK(imap, agno); | 1547 | AG_UNLOCK(imap, agno); |
1550 | return (rc); | 1548 | return (rc); |
1551 | 1549 | ||
1552 | } | 1550 | } |
1553 | 1551 | ||
1554 | /* check if we may allocate an extent of free | 1552 | /* check if we may allocate an extent of free |
1555 | * inodes and whether this word of the free | 1553 | * inodes and whether this word of the free |
1556 | * extents summary map describes a free extent. | 1554 | * extents summary map describes a free extent. |
1557 | */ | 1555 | */ |
1558 | if (addext && ~extsmap) { | 1556 | if (addext && ~extsmap) { |
1559 | /* a free extent has been found. determine | 1557 | /* a free extent has been found. determine |
1560 | * the extent number. | 1558 | * the extent number. |
1561 | */ | 1559 | */ |
1562 | rem = diFindFree(extsmap, 0); | 1560 | rem = diFindFree(extsmap, 0); |
1563 | extno = (sword << L2EXTSPERSUM) + rem; | 1561 | extno = (sword << L2EXTSPERSUM) + rem; |
1564 | 1562 | ||
1565 | /* allocate an extent of free inodes. | 1563 | /* allocate an extent of free inodes. |
1566 | */ | 1564 | */ |
1567 | if ((rc = diNewExt(imap, iagp, extno))) { | 1565 | if ((rc = diNewExt(imap, iagp, extno))) { |
1568 | /* if there is no disk space for a | 1566 | /* if there is no disk space for a |
1569 | * new extent, try to allocate the | 1567 | * new extent, try to allocate the |
1570 | * disk inode from somewhere else. | 1568 | * disk inode from somewhere else. |
1571 | */ | 1569 | */ |
1572 | if (rc == -ENOSPC) | 1570 | if (rc == -ENOSPC) |
1573 | break; | 1571 | break; |
1574 | 1572 | ||
1575 | assert(rc == -EIO); | 1573 | assert(rc == -EIO); |
1576 | } else { | 1574 | } else { |
1577 | /* set the results of the allocation | 1575 | /* set the results of the allocation |
1578 | * and write the iag. | 1576 | * and write the iag. |
1579 | */ | 1577 | */ |
1580 | diInitInode(ip, iagno, | 1578 | diInitInode(ip, iagno, |
1581 | extno << L2INOSPEREXT, | 1579 | extno << L2INOSPEREXT, |
1582 | extno, iagp); | 1580 | extno, iagp); |
1583 | mark_metapage_dirty(mp); | 1581 | mark_metapage_dirty(mp); |
1584 | } | 1582 | } |
1585 | release_metapage(mp); | 1583 | release_metapage(mp); |
1586 | /* free the imap inode & the AG lock & return. | 1584 | /* free the imap inode & the AG lock & return. |
1587 | */ | 1585 | */ |
1588 | IREAD_UNLOCK(ipimap); | 1586 | IREAD_UNLOCK(ipimap); |
1589 | AG_UNLOCK(imap, agno); | 1587 | AG_UNLOCK(imap, agno); |
1590 | return (rc); | 1588 | return (rc); |
1591 | } | 1589 | } |
1592 | 1590 | ||
1593 | /* move on to the next set of summary map words. | 1591 | /* move on to the next set of summary map words. |
1594 | */ | 1592 | */ |
1595 | sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; | 1593 | sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; |
1596 | inosmap = le32_to_cpu(iagp->inosmap[sword]); | 1594 | inosmap = le32_to_cpu(iagp->inosmap[sword]); |
1597 | extsmap = le32_to_cpu(iagp->extsmap[sword]); | 1595 | extsmap = le32_to_cpu(iagp->extsmap[sword]); |
1598 | } | 1596 | } |
1599 | } | 1597 | } |
1600 | /* unlock imap inode */ | 1598 | /* unlock imap inode */ |
1601 | IREAD_UNLOCK(ipimap); | 1599 | IREAD_UNLOCK(ipimap); |
1602 | 1600 | ||
1603 | /* nothing doing in this iag, so release it. */ | 1601 | /* nothing doing in this iag, so release it. */ |
1604 | release_metapage(mp); | 1602 | release_metapage(mp); |
1605 | 1603 | ||
1606 | tryag: | 1604 | tryag: |
1607 | /* | 1605 | /* |
1608 | * try to allocate anywhere within the same AG as the parent inode. | 1606 | * try to allocate anywhere within the same AG as the parent inode. |
1609 | */ | 1607 | */ |
1610 | rc = diAllocAG(imap, agno, dir, ip); | 1608 | rc = diAllocAG(imap, agno, dir, ip); |
1611 | 1609 | ||
1612 | AG_UNLOCK(imap, agno); | 1610 | AG_UNLOCK(imap, agno); |
1613 | 1611 | ||
1614 | if (rc != -ENOSPC) | 1612 | if (rc != -ENOSPC) |
1615 | return (rc); | 1613 | return (rc); |
1616 | 1614 | ||
1617 | /* | 1615 | /* |
1618 | * try to allocate in any AG. | 1616 | * try to allocate in any AG. |
1619 | */ | 1617 | */ |
1620 | return (diAllocAny(imap, agno, dir, ip)); | 1618 | return (diAllocAny(imap, agno, dir, ip)); |
1621 | } | 1619 | } |
1622 | 1620 | ||
1623 | 1621 | ||
1624 | /* | 1622 | /* |
1625 | * NAME: diAllocAG(imap,agno,dir,ip) | 1623 | * NAME: diAllocAG(imap,agno,dir,ip) |
1626 | * | 1624 | * |
1627 | * FUNCTION: allocate a disk inode from the allocation group. | 1625 | * FUNCTION: allocate a disk inode from the allocation group. |
1628 | * | 1626 | * |
1629 | * this routine first determines if a new extent of free | 1627 | * this routine first determines if a new extent of free |
1630 | * inodes should be added for the allocation group, with | 1628 | * inodes should be added for the allocation group, with |
1631 | * the current request satisfied from this extent. if this | 1629 | * the current request satisfied from this extent. if this |
1632 | * is the case, an attempt will be made to do just that. if | 1630 | * is the case, an attempt will be made to do just that. if |
1633 | * this attempt fails or it has been determined that a new | 1631 | * this attempt fails or it has been determined that a new |
1634 | * extent should not be added, an attempt is made to satisfy | 1632 | * extent should not be added, an attempt is made to satisfy |
1635 | * the request by allocating an existing (backed) free inode | 1633 | * the request by allocating an existing (backed) free inode |
1636 | * from the allocation group. | 1634 | * from the allocation group. |
1637 | * | 1635 | * |
1638 | * PRE CONDITION: Already have the AG lock for this AG. | 1636 | * PRE CONDITION: Already have the AG lock for this AG. |
1639 | * | 1637 | * |
1640 | * PARAMETERS: | 1638 | * PARAMETERS: |
1641 | * imap - pointer to inode map control structure. | 1639 | * imap - pointer to inode map control structure. |
1642 | * agno - allocation group to allocate from. | 1640 | * agno - allocation group to allocate from. |
1643 | * dir - 'true' if the new disk inode is for a directory. | 1641 | * dir - 'true' if the new disk inode is for a directory. |
1644 | * ip - pointer to the new inode to be filled in on successful return | 1642 | * ip - pointer to the new inode to be filled in on successful return |
1645 | * with the disk inode number allocated, its extent address | 1643 | * with the disk inode number allocated, its extent address |
1646 | * and the start of the ag. | 1644 | * and the start of the ag. |
1647 | * | 1645 | * |
1648 | * RETURN VALUES: | 1646 | * RETURN VALUES: |
1649 | * 0 - success. | 1647 | * 0 - success. |
1650 | * -ENOSPC - insufficient disk resources. | 1648 | * -ENOSPC - insufficient disk resources. |
1651 | * -EIO - i/o error. | 1649 | * -EIO - i/o error. |
1652 | */ | 1650 | */ |
1653 | static int | 1651 | static int |
1654 | diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) | 1652 | diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) |
1655 | { | 1653 | { |
1656 | int rc, addext, numfree, numinos; | 1654 | int rc, addext, numfree, numinos; |
1657 | 1655 | ||
1658 | /* get the number of free and the number of backed disk | 1656 | /* get the number of free and the number of backed disk |
1659 | * inodes currently within the ag. | 1657 | * inodes currently within the ag. |
1660 | */ | 1658 | */ |
1661 | numfree = imap->im_agctl[agno].numfree; | 1659 | numfree = imap->im_agctl[agno].numfree; |
1662 | numinos = imap->im_agctl[agno].numinos; | 1660 | numinos = imap->im_agctl[agno].numinos; |
1663 | 1661 | ||
1664 | if (numfree > numinos) { | 1662 | if (numfree > numinos) { |
1665 | jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); | 1663 | jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); |
1666 | return -EIO; | 1664 | return -EIO; |
1667 | } | 1665 | } |
1668 | 1666 | ||
1669 | /* determine if we should allocate a new extent of free inodes | 1667 | /* determine if we should allocate a new extent of free inodes |
1670 | * within the ag: for directory inodes, add a new extent | 1668 | * within the ag: for directory inodes, add a new extent |
1671 | * if there are a small number of free inodes or number of free | 1669 | * if there are a small number of free inodes or number of free |
1672 | * inodes is a small percentage of the number of backed inodes. | 1670 | * inodes is a small percentage of the number of backed inodes. |
1673 | */ | 1671 | */ |
1674 | if (dir) | 1672 | if (dir) |
1675 | addext = (numfree < 64 || | 1673 | addext = (numfree < 64 || |
1676 | (numfree < 256 | 1674 | (numfree < 256 |
1677 | && ((numfree * 100) / numinos) <= 20)); | 1675 | && ((numfree * 100) / numinos) <= 20)); |
1678 | else | 1676 | else |
1679 | addext = (numfree == 0); | 1677 | addext = (numfree == 0); |
1680 | 1678 | ||
1681 | /* | 1679 | /* |
1682 | * try to allocate a new extent of free inodes. | 1680 | * try to allocate a new extent of free inodes. |
1683 | */ | 1681 | */ |
1684 | if (addext) { | 1682 | if (addext) { |
1685 | /* if free space is not avaliable for this new extent, try | 1683 | /* if free space is not avaliable for this new extent, try |
1686 | * below to allocate a free and existing (already backed) | 1684 | * below to allocate a free and existing (already backed) |
1687 | * inode from the ag. | 1685 | * inode from the ag. |
1688 | */ | 1686 | */ |
1689 | if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) | 1687 | if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) |
1690 | return (rc); | 1688 | return (rc); |
1691 | } | 1689 | } |
1692 | 1690 | ||
1693 | /* | 1691 | /* |
1694 | * try to allocate an existing free inode from the ag. | 1692 | * try to allocate an existing free inode from the ag. |
1695 | */ | 1693 | */ |
1696 | return (diAllocIno(imap, agno, ip)); | 1694 | return (diAllocIno(imap, agno, ip)); |
1697 | } | 1695 | } |
1698 | 1696 | ||
1699 | 1697 | ||
1700 | /* | 1698 | /* |
1701 | * NAME: diAllocAny(imap,agno,dir,iap) | 1699 | * NAME: diAllocAny(imap,agno,dir,iap) |
1702 | * | 1700 | * |
1703 | * FUNCTION: allocate a disk inode from any other allocation group. | 1701 | * FUNCTION: allocate a disk inode from any other allocation group. |
1704 | * | 1702 | * |
1705 | * this routine is called when an allocation attempt within | 1703 | * this routine is called when an allocation attempt within |
1706 | * the primary allocation group has failed. if attempts to | 1704 | * the primary allocation group has failed. if attempts to |
1707 | * allocate an inode from any allocation group other than the | 1705 | * allocate an inode from any allocation group other than the |
1708 | * specified primary group. | 1706 | * specified primary group. |
1709 | * | 1707 | * |
1710 | * PARAMETERS: | 1708 | * PARAMETERS: |
1711 | * imap - pointer to inode map control structure. | 1709 | * imap - pointer to inode map control structure. |
1712 | * agno - primary allocation group (to avoid). | 1710 | * agno - primary allocation group (to avoid). |
1713 | * dir - 'true' if the new disk inode is for a directory. | 1711 | * dir - 'true' if the new disk inode is for a directory. |
1714 | * ip - pointer to a new inode to be filled in on successful return | 1712 | * ip - pointer to a new inode to be filled in on successful return |
1715 | * with the disk inode number allocated, its extent address | 1713 | * with the disk inode number allocated, its extent address |
1716 | * and the start of the ag. | 1714 | * and the start of the ag. |
1717 | * | 1715 | * |
1718 | * RETURN VALUES: | 1716 | * RETURN VALUES: |
1719 | * 0 - success. | 1717 | * 0 - success. |
1720 | * -ENOSPC - insufficient disk resources. | 1718 | * -ENOSPC - insufficient disk resources. |
1721 | * -EIO - i/o error. | 1719 | * -EIO - i/o error. |
1722 | */ | 1720 | */ |
1723 | static int | 1721 | static int |
1724 | diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) | 1722 | diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) |
1725 | { | 1723 | { |
1726 | int ag, rc; | 1724 | int ag, rc; |
1727 | int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; | 1725 | int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; |
1728 | 1726 | ||
1729 | 1727 | ||
1730 | /* try to allocate from the ags following agno up to | 1728 | /* try to allocate from the ags following agno up to |
1731 | * the maximum ag number. | 1729 | * the maximum ag number. |
1732 | */ | 1730 | */ |
1733 | for (ag = agno + 1; ag <= maxag; ag++) { | 1731 | for (ag = agno + 1; ag <= maxag; ag++) { |
1734 | AG_LOCK(imap, ag); | 1732 | AG_LOCK(imap, ag); |
1735 | 1733 | ||
1736 | rc = diAllocAG(imap, ag, dir, ip); | 1734 | rc = diAllocAG(imap, ag, dir, ip); |
1737 | 1735 | ||
1738 | AG_UNLOCK(imap, ag); | 1736 | AG_UNLOCK(imap, ag); |
1739 | 1737 | ||
1740 | if (rc != -ENOSPC) | 1738 | if (rc != -ENOSPC) |
1741 | return (rc); | 1739 | return (rc); |
1742 | } | 1740 | } |
1743 | 1741 | ||
1744 | /* try to allocate from the ags in front of agno. | 1742 | /* try to allocate from the ags in front of agno. |
1745 | */ | 1743 | */ |
1746 | for (ag = 0; ag < agno; ag++) { | 1744 | for (ag = 0; ag < agno; ag++) { |
1747 | AG_LOCK(imap, ag); | 1745 | AG_LOCK(imap, ag); |
1748 | 1746 | ||
1749 | rc = diAllocAG(imap, ag, dir, ip); | 1747 | rc = diAllocAG(imap, ag, dir, ip); |
1750 | 1748 | ||
1751 | AG_UNLOCK(imap, ag); | 1749 | AG_UNLOCK(imap, ag); |
1752 | 1750 | ||
1753 | if (rc != -ENOSPC) | 1751 | if (rc != -ENOSPC) |
1754 | return (rc); | 1752 | return (rc); |
1755 | } | 1753 | } |
1756 | 1754 | ||
1757 | /* no free disk inodes. | 1755 | /* no free disk inodes. |
1758 | */ | 1756 | */ |
1759 | return -ENOSPC; | 1757 | return -ENOSPC; |
1760 | } | 1758 | } |
1761 | 1759 | ||
1762 | 1760 | ||
1763 | /* | 1761 | /* |
1764 | * NAME: diAllocIno(imap,agno,ip) | 1762 | * NAME: diAllocIno(imap,agno,ip) |
1765 | * | 1763 | * |
1766 | * FUNCTION: allocate a disk inode from the allocation group's free | 1764 | * FUNCTION: allocate a disk inode from the allocation group's free |
1767 | * inode list, returning an error if this free list is | 1765 | * inode list, returning an error if this free list is |
1768 | * empty (i.e. no iags on the list). | 1766 | * empty (i.e. no iags on the list). |
1769 | * | 1767 | * |
1770 | * allocation occurs from the first iag on the list using | 1768 | * allocation occurs from the first iag on the list using |
1771 | * the iag's free inode summary map to find the leftmost | 1769 | * the iag's free inode summary map to find the leftmost |
1772 | * free inode in the iag. | 1770 | * free inode in the iag. |
1773 | * | 1771 | * |
1774 | * PRE CONDITION: Already have AG lock for this AG. | 1772 | * PRE CONDITION: Already have AG lock for this AG. |
1775 | * | 1773 | * |
1776 | * PARAMETERS: | 1774 | * PARAMETERS: |
1777 | * imap - pointer to inode map control structure. | 1775 | * imap - pointer to inode map control structure. |
1778 | * agno - allocation group. | 1776 | * agno - allocation group. |
1779 | * ip - pointer to new inode to be filled in on successful return | 1777 | * ip - pointer to new inode to be filled in on successful return |
1780 | * with the disk inode number allocated, its extent address | 1778 | * with the disk inode number allocated, its extent address |
1781 | * and the start of the ag. | 1779 | * and the start of the ag. |
1782 | * | 1780 | * |
1783 | * RETURN VALUES: | 1781 | * RETURN VALUES: |
1784 | * 0 - success. | 1782 | * 0 - success. |
1785 | * -ENOSPC - insufficient disk resources. | 1783 | * -ENOSPC - insufficient disk resources. |
1786 | * -EIO - i/o error. | 1784 | * -EIO - i/o error. |
1787 | */ | 1785 | */ |
1788 | static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | 1786 | static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) |
1789 | { | 1787 | { |
1790 | int iagno, ino, rc, rem, extno, sword; | 1788 | int iagno, ino, rc, rem, extno, sword; |
1791 | struct metapage *mp; | 1789 | struct metapage *mp; |
1792 | struct iag *iagp; | 1790 | struct iag *iagp; |
1793 | 1791 | ||
1794 | /* check if there are iags on the ag's free inode list. | 1792 | /* check if there are iags on the ag's free inode list. |
1795 | */ | 1793 | */ |
1796 | if ((iagno = imap->im_agctl[agno].inofree) < 0) | 1794 | if ((iagno = imap->im_agctl[agno].inofree) < 0) |
1797 | return -ENOSPC; | 1795 | return -ENOSPC; |
1798 | 1796 | ||
1799 | /* obtain read lock on imap inode */ | 1797 | /* obtain read lock on imap inode */ |
1800 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); | 1798 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); |
1801 | 1799 | ||
1802 | /* read the iag at the head of the list. | 1800 | /* read the iag at the head of the list. |
1803 | */ | 1801 | */ |
1804 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 1802 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
1805 | IREAD_UNLOCK(imap->im_ipimap); | 1803 | IREAD_UNLOCK(imap->im_ipimap); |
1806 | return (rc); | 1804 | return (rc); |
1807 | } | 1805 | } |
1808 | iagp = (struct iag *) mp->data; | 1806 | iagp = (struct iag *) mp->data; |
1809 | 1807 | ||
1810 | /* better be free inodes in this iag if it is on the | 1808 | /* better be free inodes in this iag if it is on the |
1811 | * list. | 1809 | * list. |
1812 | */ | 1810 | */ |
1813 | if (!iagp->nfreeinos) { | 1811 | if (!iagp->nfreeinos) { |
1814 | IREAD_UNLOCK(imap->im_ipimap); | 1812 | IREAD_UNLOCK(imap->im_ipimap); |
1815 | release_metapage(mp); | 1813 | release_metapage(mp); |
1816 | jfs_error(ip->i_sb, | 1814 | jfs_error(ip->i_sb, |
1817 | "diAllocIno: nfreeinos = 0, but iag on freelist"); | 1815 | "diAllocIno: nfreeinos = 0, but iag on freelist"); |
1818 | return -EIO; | 1816 | return -EIO; |
1819 | } | 1817 | } |
1820 | 1818 | ||
1821 | /* scan the free inode summary map to find an extent | 1819 | /* scan the free inode summary map to find an extent |
1822 | * with free inodes. | 1820 | * with free inodes. |
1823 | */ | 1821 | */ |
1824 | for (sword = 0;; sword++) { | 1822 | for (sword = 0;; sword++) { |
1825 | if (sword >= SMAPSZ) { | 1823 | if (sword >= SMAPSZ) { |
1826 | IREAD_UNLOCK(imap->im_ipimap); | 1824 | IREAD_UNLOCK(imap->im_ipimap); |
1827 | release_metapage(mp); | 1825 | release_metapage(mp); |
1828 | jfs_error(ip->i_sb, | 1826 | jfs_error(ip->i_sb, |
1829 | "diAllocIno: free inode not found in summary map"); | 1827 | "diAllocIno: free inode not found in summary map"); |
1830 | return -EIO; | 1828 | return -EIO; |
1831 | } | 1829 | } |
1832 | 1830 | ||
1833 | if (~iagp->inosmap[sword]) | 1831 | if (~iagp->inosmap[sword]) |
1834 | break; | 1832 | break; |
1835 | } | 1833 | } |
1836 | 1834 | ||
1837 | /* found a extent with free inodes. determine | 1835 | /* found a extent with free inodes. determine |
1838 | * the extent number. | 1836 | * the extent number. |
1839 | */ | 1837 | */ |
1840 | rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); | 1838 | rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); |
1841 | if (rem >= EXTSPERSUM) { | 1839 | if (rem >= EXTSPERSUM) { |
1842 | IREAD_UNLOCK(imap->im_ipimap); | 1840 | IREAD_UNLOCK(imap->im_ipimap); |
1843 | release_metapage(mp); | 1841 | release_metapage(mp); |
1844 | jfs_error(ip->i_sb, "diAllocIno: no free extent found"); | 1842 | jfs_error(ip->i_sb, "diAllocIno: no free extent found"); |
1845 | return -EIO; | 1843 | return -EIO; |
1846 | } | 1844 | } |
1847 | extno = (sword << L2EXTSPERSUM) + rem; | 1845 | extno = (sword << L2EXTSPERSUM) + rem; |
1848 | 1846 | ||
1849 | /* find the first free inode in the extent. | 1847 | /* find the first free inode in the extent. |
1850 | */ | 1848 | */ |
1851 | rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); | 1849 | rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); |
1852 | if (rem >= INOSPEREXT) { | 1850 | if (rem >= INOSPEREXT) { |
1853 | IREAD_UNLOCK(imap->im_ipimap); | 1851 | IREAD_UNLOCK(imap->im_ipimap); |
1854 | release_metapage(mp); | 1852 | release_metapage(mp); |
1855 | jfs_error(ip->i_sb, "diAllocIno: free inode not found"); | 1853 | jfs_error(ip->i_sb, "diAllocIno: free inode not found"); |
1856 | return -EIO; | 1854 | return -EIO; |
1857 | } | 1855 | } |
1858 | 1856 | ||
1859 | /* compute the inode number within the iag. | 1857 | /* compute the inode number within the iag. |
1860 | */ | 1858 | */ |
1861 | ino = (extno << L2INOSPEREXT) + rem; | 1859 | ino = (extno << L2INOSPEREXT) + rem; |
1862 | 1860 | ||
1863 | /* allocate the inode. | 1861 | /* allocate the inode. |
1864 | */ | 1862 | */ |
1865 | rc = diAllocBit(imap, iagp, ino); | 1863 | rc = diAllocBit(imap, iagp, ino); |
1866 | IREAD_UNLOCK(imap->im_ipimap); | 1864 | IREAD_UNLOCK(imap->im_ipimap); |
1867 | if (rc) { | 1865 | if (rc) { |
1868 | release_metapage(mp); | 1866 | release_metapage(mp); |
1869 | return (rc); | 1867 | return (rc); |
1870 | } | 1868 | } |
1871 | 1869 | ||
1872 | /* set the results of the allocation and write the iag. | 1870 | /* set the results of the allocation and write the iag. |
1873 | */ | 1871 | */ |
1874 | diInitInode(ip, iagno, ino, extno, iagp); | 1872 | diInitInode(ip, iagno, ino, extno, iagp); |
1875 | write_metapage(mp); | 1873 | write_metapage(mp); |
1876 | 1874 | ||
1877 | return (0); | 1875 | return (0); |
1878 | } | 1876 | } |
1879 | 1877 | ||
1880 | 1878 | ||
1881 | /* | 1879 | /* |
1882 | * NAME: diAllocExt(imap,agno,ip) | 1880 | * NAME: diAllocExt(imap,agno,ip) |
1883 | * | 1881 | * |
1884 | * FUNCTION: add a new extent of free inodes to an iag, allocating | 1882 | * FUNCTION: add a new extent of free inodes to an iag, allocating |
1885 | * an inode from this extent to satisfy the current allocation | 1883 | * an inode from this extent to satisfy the current allocation |
1886 | * request. | 1884 | * request. |
1887 | * | 1885 | * |
1888 | * this routine first tries to find an existing iag with free | 1886 | * this routine first tries to find an existing iag with free |
1889 | * extents through the ag free extent list. if list is not | 1887 | * extents through the ag free extent list. if list is not |
1890 | * empty, the head of the list will be selected as the home | 1888 | * empty, the head of the list will be selected as the home |
1891 | * of the new extent of free inodes. otherwise (the list is | 1889 | * of the new extent of free inodes. otherwise (the list is |
1892 | * empty), a new iag will be allocated for the ag to contain | 1890 | * empty), a new iag will be allocated for the ag to contain |
1893 | * the extent. | 1891 | * the extent. |
1894 | * | 1892 | * |
1895 | * once an iag has been selected, the free extent summary map | 1893 | * once an iag has been selected, the free extent summary map |
1896 | * is used to locate a free extent within the iag and diNewExt() | 1894 | * is used to locate a free extent within the iag and diNewExt() |
1897 | * is called to initialize the extent, with initialization | 1895 | * is called to initialize the extent, with initialization |
1898 | * including the allocation of the first inode of the extent | 1896 | * including the allocation of the first inode of the extent |
1899 | * for the purpose of satisfying this request. | 1897 | * for the purpose of satisfying this request. |
1900 | * | 1898 | * |
1901 | * PARAMETERS: | 1899 | * PARAMETERS: |
1902 | * imap - pointer to inode map control structure. | 1900 | * imap - pointer to inode map control structure. |
1903 | * agno - allocation group number. | 1901 | * agno - allocation group number. |
1904 | * ip - pointer to new inode to be filled in on successful return | 1902 | * ip - pointer to new inode to be filled in on successful return |
1905 | * with the disk inode number allocated, its extent address | 1903 | * with the disk inode number allocated, its extent address |
1906 | * and the start of the ag. | 1904 | * and the start of the ag. |
1907 | * | 1905 | * |
1908 | * RETURN VALUES: | 1906 | * RETURN VALUES: |
1909 | * 0 - success. | 1907 | * 0 - success. |
1910 | * -ENOSPC - insufficient disk resources. | 1908 | * -ENOSPC - insufficient disk resources. |
1911 | * -EIO - i/o error. | 1909 | * -EIO - i/o error. |
1912 | */ | 1910 | */ |
1913 | static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | 1911 | static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) |
1914 | { | 1912 | { |
1915 | int rem, iagno, sword, extno, rc; | 1913 | int rem, iagno, sword, extno, rc; |
1916 | struct metapage *mp; | 1914 | struct metapage *mp; |
1917 | struct iag *iagp; | 1915 | struct iag *iagp; |
1918 | 1916 | ||
1919 | /* check if the ag has any iags with free extents. if not, | 1917 | /* check if the ag has any iags with free extents. if not, |
1920 | * allocate a new iag for the ag. | 1918 | * allocate a new iag for the ag. |
1921 | */ | 1919 | */ |
1922 | if ((iagno = imap->im_agctl[agno].extfree) < 0) { | 1920 | if ((iagno = imap->im_agctl[agno].extfree) < 0) { |
1923 | /* If successful, diNewIAG will obtain the read lock on the | 1921 | /* If successful, diNewIAG will obtain the read lock on the |
1924 | * imap inode. | 1922 | * imap inode. |
1925 | */ | 1923 | */ |
1926 | if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { | 1924 | if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { |
1927 | return (rc); | 1925 | return (rc); |
1928 | } | 1926 | } |
1929 | iagp = (struct iag *) mp->data; | 1927 | iagp = (struct iag *) mp->data; |
1930 | 1928 | ||
1931 | /* set the ag number if this a brand new iag | 1929 | /* set the ag number if this a brand new iag |
1932 | */ | 1930 | */ |
1933 | iagp->agstart = | 1931 | iagp->agstart = |
1934 | cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); | 1932 | cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); |
1935 | } else { | 1933 | } else { |
1936 | /* read the iag. | 1934 | /* read the iag. |
1937 | */ | 1935 | */ |
1938 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); | 1936 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); |
1939 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 1937 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
1940 | IREAD_UNLOCK(imap->im_ipimap); | 1938 | IREAD_UNLOCK(imap->im_ipimap); |
1941 | jfs_error(ip->i_sb, "diAllocExt: error reading iag"); | 1939 | jfs_error(ip->i_sb, "diAllocExt: error reading iag"); |
1942 | return rc; | 1940 | return rc; |
1943 | } | 1941 | } |
1944 | iagp = (struct iag *) mp->data; | 1942 | iagp = (struct iag *) mp->data; |
1945 | } | 1943 | } |
1946 | 1944 | ||
1947 | /* using the free extent summary map, find a free extent. | 1945 | /* using the free extent summary map, find a free extent. |
1948 | */ | 1946 | */ |
1949 | for (sword = 0;; sword++) { | 1947 | for (sword = 0;; sword++) { |
1950 | if (sword >= SMAPSZ) { | 1948 | if (sword >= SMAPSZ) { |
1951 | release_metapage(mp); | 1949 | release_metapage(mp); |
1952 | IREAD_UNLOCK(imap->im_ipimap); | 1950 | IREAD_UNLOCK(imap->im_ipimap); |
1953 | jfs_error(ip->i_sb, | 1951 | jfs_error(ip->i_sb, |
1954 | "diAllocExt: free ext summary map not found"); | 1952 | "diAllocExt: free ext summary map not found"); |
1955 | return -EIO; | 1953 | return -EIO; |
1956 | } | 1954 | } |
1957 | if (~iagp->extsmap[sword]) | 1955 | if (~iagp->extsmap[sword]) |
1958 | break; | 1956 | break; |
1959 | } | 1957 | } |
1960 | 1958 | ||
1961 | /* determine the extent number of the free extent. | 1959 | /* determine the extent number of the free extent. |
1962 | */ | 1960 | */ |
1963 | rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); | 1961 | rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); |
1964 | if (rem >= EXTSPERSUM) { | 1962 | if (rem >= EXTSPERSUM) { |
1965 | release_metapage(mp); | 1963 | release_metapage(mp); |
1966 | IREAD_UNLOCK(imap->im_ipimap); | 1964 | IREAD_UNLOCK(imap->im_ipimap); |
1967 | jfs_error(ip->i_sb, "diAllocExt: free extent not found"); | 1965 | jfs_error(ip->i_sb, "diAllocExt: free extent not found"); |
1968 | return -EIO; | 1966 | return -EIO; |
1969 | } | 1967 | } |
1970 | extno = (sword << L2EXTSPERSUM) + rem; | 1968 | extno = (sword << L2EXTSPERSUM) + rem; |
1971 | 1969 | ||
1972 | /* initialize the new extent. | 1970 | /* initialize the new extent. |
1973 | */ | 1971 | */ |
1974 | rc = diNewExt(imap, iagp, extno); | 1972 | rc = diNewExt(imap, iagp, extno); |
1975 | IREAD_UNLOCK(imap->im_ipimap); | 1973 | IREAD_UNLOCK(imap->im_ipimap); |
1976 | if (rc) { | 1974 | if (rc) { |
1977 | /* something bad happened. if a new iag was allocated, | 1975 | /* something bad happened. if a new iag was allocated, |
1978 | * place it back on the inode map's iag free list, and | 1976 | * place it back on the inode map's iag free list, and |
1979 | * clear the ag number information. | 1977 | * clear the ag number information. |
1980 | */ | 1978 | */ |
1981 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { | 1979 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { |
1982 | IAGFREE_LOCK(imap); | 1980 | IAGFREE_LOCK(imap); |
1983 | iagp->iagfree = cpu_to_le32(imap->im_freeiag); | 1981 | iagp->iagfree = cpu_to_le32(imap->im_freeiag); |
1984 | imap->im_freeiag = iagno; | 1982 | imap->im_freeiag = iagno; |
1985 | IAGFREE_UNLOCK(imap); | 1983 | IAGFREE_UNLOCK(imap); |
1986 | } | 1984 | } |
1987 | write_metapage(mp); | 1985 | write_metapage(mp); |
1988 | return (rc); | 1986 | return (rc); |
1989 | } | 1987 | } |
1990 | 1988 | ||
1991 | /* set the results of the allocation and write the iag. | 1989 | /* set the results of the allocation and write the iag. |
1992 | */ | 1990 | */ |
1993 | diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); | 1991 | diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); |
1994 | 1992 | ||
1995 | write_metapage(mp); | 1993 | write_metapage(mp); |
1996 | 1994 | ||
1997 | return (0); | 1995 | return (0); |
1998 | } | 1996 | } |
1999 | 1997 | ||
2000 | 1998 | ||
2001 | /* | 1999 | /* |
2002 | * NAME: diAllocBit(imap,iagp,ino) | 2000 | * NAME: diAllocBit(imap,iagp,ino) |
2003 | * | 2001 | * |
2004 | * FUNCTION: allocate a backed inode from an iag. | 2002 | * FUNCTION: allocate a backed inode from an iag. |
2005 | * | 2003 | * |
2006 | * this routine performs the mechanics of allocating a | 2004 | * this routine performs the mechanics of allocating a |
2007 | * specified inode from a backed extent. | 2005 | * specified inode from a backed extent. |
2008 | * | 2006 | * |
2009 | * if the inode to be allocated represents the last free | 2007 | * if the inode to be allocated represents the last free |
2010 | * inode within the iag, the iag will be removed from the | 2008 | * inode within the iag, the iag will be removed from the |
2011 | * ag free inode list. | 2009 | * ag free inode list. |
2012 | * | 2010 | * |
2013 | * a careful update approach is used to provide consistency | 2011 | * a careful update approach is used to provide consistency |
2014 | * in the face of updates to multiple buffers. under this | 2012 | * in the face of updates to multiple buffers. under this |
2015 | * approach, all required buffers are obtained before making | 2013 | * approach, all required buffers are obtained before making |
2016 | * any updates and are held all are updates are complete. | 2014 | * any updates and are held all are updates are complete. |
2017 | * | 2015 | * |
2018 | * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on | 2016 | * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on |
2019 | * this AG. Must have read lock on imap inode. | 2017 | * this AG. Must have read lock on imap inode. |
2020 | * | 2018 | * |
2021 | * PARAMETERS: | 2019 | * PARAMETERS: |
2022 | * imap - pointer to inode map control structure. | 2020 | * imap - pointer to inode map control structure. |
2023 | * iagp - pointer to iag. | 2021 | * iagp - pointer to iag. |
2024 | * ino - inode number to be allocated within the iag. | 2022 | * ino - inode number to be allocated within the iag. |
2025 | * | 2023 | * |
2026 | * RETURN VALUES: | 2024 | * RETURN VALUES: |
2027 | * 0 - success. | 2025 | * 0 - success. |
2028 | * -ENOSPC - insufficient disk resources. | 2026 | * -ENOSPC - insufficient disk resources. |
2029 | * -EIO - i/o error. | 2027 | * -EIO - i/o error. |
2030 | */ | 2028 | */ |
2031 | static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | 2029 | static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) |
2032 | { | 2030 | { |
2033 | int extno, bitno, agno, sword, rc; | 2031 | int extno, bitno, agno, sword, rc; |
2034 | struct metapage *amp = NULL, *bmp = NULL; | 2032 | struct metapage *amp = NULL, *bmp = NULL; |
2035 | struct iag *aiagp = NULL, *biagp = NULL; | 2033 | struct iag *aiagp = NULL, *biagp = NULL; |
2036 | u32 mask; | 2034 | u32 mask; |
2037 | 2035 | ||
2038 | /* check if this is the last free inode within the iag. | 2036 | /* check if this is the last free inode within the iag. |
2039 | * if so, it will have to be removed from the ag free | 2037 | * if so, it will have to be removed from the ag free |
2040 | * inode list, so get the iags preceeding and following | 2038 | * inode list, so get the iags preceeding and following |
2041 | * it on the list. | 2039 | * it on the list. |
2042 | */ | 2040 | */ |
2043 | if (iagp->nfreeinos == cpu_to_le32(1)) { | 2041 | if (iagp->nfreeinos == cpu_to_le32(1)) { |
2044 | if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { | 2042 | if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { |
2045 | if ((rc = | 2043 | if ((rc = |
2046 | diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), | 2044 | diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), |
2047 | &))) | 2045 | &))) |
2048 | return (rc); | 2046 | return (rc); |
2049 | aiagp = (struct iag *) amp->data; | 2047 | aiagp = (struct iag *) amp->data; |
2050 | } | 2048 | } |
2051 | 2049 | ||
2052 | if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { | 2050 | if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { |
2053 | if ((rc = | 2051 | if ((rc = |
2054 | diIAGRead(imap, | 2052 | diIAGRead(imap, |
2055 | le32_to_cpu(iagp->inofreeback), | 2053 | le32_to_cpu(iagp->inofreeback), |
2056 | &bmp))) { | 2054 | &bmp))) { |
2057 | if (amp) | 2055 | if (amp) |
2058 | release_metapage(amp); | 2056 | release_metapage(amp); |
2059 | return (rc); | 2057 | return (rc); |
2060 | } | 2058 | } |
2061 | biagp = (struct iag *) bmp->data; | 2059 | biagp = (struct iag *) bmp->data; |
2062 | } | 2060 | } |
2063 | } | 2061 | } |
2064 | 2062 | ||
2065 | /* get the ag number, extent number, inode number within | 2063 | /* get the ag number, extent number, inode number within |
2066 | * the extent. | 2064 | * the extent. |
2067 | */ | 2065 | */ |
2068 | agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); | 2066 | agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); |
2069 | extno = ino >> L2INOSPEREXT; | 2067 | extno = ino >> L2INOSPEREXT; |
2070 | bitno = ino & (INOSPEREXT - 1); | 2068 | bitno = ino & (INOSPEREXT - 1); |
2071 | 2069 | ||
2072 | /* compute the mask for setting the map. | 2070 | /* compute the mask for setting the map. |
2073 | */ | 2071 | */ |
2074 | mask = HIGHORDER >> bitno; | 2072 | mask = HIGHORDER >> bitno; |
2075 | 2073 | ||
2076 | /* the inode should be free and backed. | 2074 | /* the inode should be free and backed. |
2077 | */ | 2075 | */ |
2078 | if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || | 2076 | if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || |
2079 | ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || | 2077 | ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || |
2080 | (addressPXD(&iagp->inoext[extno]) == 0)) { | 2078 | (addressPXD(&iagp->inoext[extno]) == 0)) { |
2081 | if (amp) | 2079 | if (amp) |
2082 | release_metapage(amp); | 2080 | release_metapage(amp); |
2083 | if (bmp) | 2081 | if (bmp) |
2084 | release_metapage(bmp); | 2082 | release_metapage(bmp); |
2085 | 2083 | ||
2086 | jfs_error(imap->im_ipimap->i_sb, | 2084 | jfs_error(imap->im_ipimap->i_sb, |
2087 | "diAllocBit: iag inconsistent"); | 2085 | "diAllocBit: iag inconsistent"); |
2088 | return -EIO; | 2086 | return -EIO; |
2089 | } | 2087 | } |
2090 | 2088 | ||
2091 | /* mark the inode as allocated in the working map. | 2089 | /* mark the inode as allocated in the working map. |
2092 | */ | 2090 | */ |
2093 | iagp->wmap[extno] |= cpu_to_le32(mask); | 2091 | iagp->wmap[extno] |= cpu_to_le32(mask); |
2094 | 2092 | ||
2095 | /* check if all inodes within the extent are now | 2093 | /* check if all inodes within the extent are now |
2096 | * allocated. if so, update the free inode summary | 2094 | * allocated. if so, update the free inode summary |
2097 | * map to reflect this. | 2095 | * map to reflect this. |
2098 | */ | 2096 | */ |
2099 | if (iagp->wmap[extno] == cpu_to_le32(ONES)) { | 2097 | if (iagp->wmap[extno] == cpu_to_le32(ONES)) { |
2100 | sword = extno >> L2EXTSPERSUM; | 2098 | sword = extno >> L2EXTSPERSUM; |
2101 | bitno = extno & (EXTSPERSUM - 1); | 2099 | bitno = extno & (EXTSPERSUM - 1); |
2102 | iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); | 2100 | iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); |
2103 | } | 2101 | } |
2104 | 2102 | ||
2105 | /* if this was the last free inode in the iag, remove the | 2103 | /* if this was the last free inode in the iag, remove the |
2106 | * iag from the ag free inode list. | 2104 | * iag from the ag free inode list. |
2107 | */ | 2105 | */ |
2108 | if (iagp->nfreeinos == cpu_to_le32(1)) { | 2106 | if (iagp->nfreeinos == cpu_to_le32(1)) { |
2109 | if (amp) { | 2107 | if (amp) { |
2110 | aiagp->inofreeback = iagp->inofreeback; | 2108 | aiagp->inofreeback = iagp->inofreeback; |
2111 | write_metapage(amp); | 2109 | write_metapage(amp); |
2112 | } | 2110 | } |
2113 | 2111 | ||
2114 | if (bmp) { | 2112 | if (bmp) { |
2115 | biagp->inofreefwd = iagp->inofreefwd; | 2113 | biagp->inofreefwd = iagp->inofreefwd; |
2116 | write_metapage(bmp); | 2114 | write_metapage(bmp); |
2117 | } else { | 2115 | } else { |
2118 | imap->im_agctl[agno].inofree = | 2116 | imap->im_agctl[agno].inofree = |
2119 | le32_to_cpu(iagp->inofreefwd); | 2117 | le32_to_cpu(iagp->inofreefwd); |
2120 | } | 2118 | } |
2121 | iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); | 2119 | iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); |
2122 | } | 2120 | } |
2123 | 2121 | ||
2124 | /* update the free inode count at the iag, ag, inode | 2122 | /* update the free inode count at the iag, ag, inode |
2125 | * map levels. | 2123 | * map levels. |
2126 | */ | 2124 | */ |
2127 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); | 2125 | le32_add_cpu(&iagp->nfreeinos, -1); |
2128 | imap->im_agctl[agno].numfree -= 1; | 2126 | imap->im_agctl[agno].numfree -= 1; |
2129 | atomic_dec(&imap->im_numfree); | 2127 | atomic_dec(&imap->im_numfree); |
2130 | 2128 | ||
2131 | return (0); | 2129 | return (0); |
2132 | } | 2130 | } |
2133 | 2131 | ||
2134 | 2132 | ||
2135 | /* | 2133 | /* |
2136 | * NAME: diNewExt(imap,iagp,extno) | 2134 | * NAME: diNewExt(imap,iagp,extno) |
2137 | * | 2135 | * |
2138 | * FUNCTION: initialize a new extent of inodes for an iag, allocating | 2136 | * FUNCTION: initialize a new extent of inodes for an iag, allocating |
2139 | * the first inode of the extent for use for the current | 2137 | * the first inode of the extent for use for the current |
2140 | * allocation request. | 2138 | * allocation request. |
2141 | * | 2139 | * |
2142 | * disk resources are allocated for the new extent of inodes | 2140 | * disk resources are allocated for the new extent of inodes |
2143 | * and the inodes themselves are initialized to reflect their | 2141 | * and the inodes themselves are initialized to reflect their |
2144 | * existence within the extent (i.e. their inode numbers and | 2142 | * existence within the extent (i.e. their inode numbers and |
2145 | * inode extent addresses are set) and their initial state | 2143 | * inode extent addresses are set) and their initial state |
2146 | * (mode and link count are set to zero). | 2144 | * (mode and link count are set to zero). |
2147 | * | 2145 | * |
2148 | * if the iag is new, it is not yet on an ag extent free list | 2146 | * if the iag is new, it is not yet on an ag extent free list |
2149 | * but will now be placed on this list. | 2147 | * but will now be placed on this list. |
2150 | * | 2148 | * |
2151 | * if the allocation of the new extent causes the iag to | 2149 | * if the allocation of the new extent causes the iag to |
2152 | * have no free extent, the iag will be removed from the | 2150 | * have no free extent, the iag will be removed from the |
2153 | * ag extent free list. | 2151 | * ag extent free list. |
2154 | * | 2152 | * |
2155 | * if the iag has no free backed inodes, it will be placed | 2153 | * if the iag has no free backed inodes, it will be placed |
2156 | * on the ag free inode list, since the addition of the new | 2154 | * on the ag free inode list, since the addition of the new |
2157 | * extent will now cause it to have free inodes. | 2155 | * extent will now cause it to have free inodes. |
2158 | * | 2156 | * |
2159 | * a careful update approach is used to provide consistency | 2157 | * a careful update approach is used to provide consistency |
2160 | * (i.e. list consistency) in the face of updates to multiple | 2158 | * (i.e. list consistency) in the face of updates to multiple |
2161 | * buffers. under this approach, all required buffers are | 2159 | * buffers. under this approach, all required buffers are |
2162 | * obtained before making any updates and are held until all | 2160 | * obtained before making any updates and are held until all |
2163 | * updates are complete. | 2161 | * updates are complete. |
2164 | * | 2162 | * |
2165 | * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on | 2163 | * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on |
2166 | * this AG. Must have read lock on imap inode. | 2164 | * this AG. Must have read lock on imap inode. |
2167 | * | 2165 | * |
2168 | * PARAMETERS: | 2166 | * PARAMETERS: |
2169 | * imap - pointer to inode map control structure. | 2167 | * imap - pointer to inode map control structure. |
2170 | * iagp - pointer to iag. | 2168 | * iagp - pointer to iag. |
2171 | * extno - extent number. | 2169 | * extno - extent number. |
2172 | * | 2170 | * |
2173 | * RETURN VALUES: | 2171 | * RETURN VALUES: |
2174 | * 0 - success. | 2172 | * 0 - success. |
2175 | * -ENOSPC - insufficient disk resources. | 2173 | * -ENOSPC - insufficient disk resources. |
2176 | * -EIO - i/o error. | 2174 | * -EIO - i/o error. |
2177 | */ | 2175 | */ |
2178 | static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | 2176 | static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) |
2179 | { | 2177 | { |
2180 | int agno, iagno, fwd, back, freei = 0, sword, rc; | 2178 | int agno, iagno, fwd, back, freei = 0, sword, rc; |
2181 | struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; | 2179 | struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; |
2182 | struct metapage *amp, *bmp, *cmp, *dmp; | 2180 | struct metapage *amp, *bmp, *cmp, *dmp; |
2183 | struct inode *ipimap; | 2181 | struct inode *ipimap; |
2184 | s64 blkno, hint; | 2182 | s64 blkno, hint; |
2185 | int i, j; | 2183 | int i, j; |
2186 | u32 mask; | 2184 | u32 mask; |
2187 | ino_t ino; | 2185 | ino_t ino; |
2188 | struct dinode *dp; | 2186 | struct dinode *dp; |
2189 | struct jfs_sb_info *sbi; | 2187 | struct jfs_sb_info *sbi; |
2190 | 2188 | ||
2191 | /* better have free extents. | 2189 | /* better have free extents. |
2192 | */ | 2190 | */ |
2193 | if (!iagp->nfreeexts) { | 2191 | if (!iagp->nfreeexts) { |
2194 | jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); | 2192 | jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); |
2195 | return -EIO; | 2193 | return -EIO; |
2196 | } | 2194 | } |
2197 | 2195 | ||
2198 | /* get the inode map inode. | 2196 | /* get the inode map inode. |
2199 | */ | 2197 | */ |
2200 | ipimap = imap->im_ipimap; | 2198 | ipimap = imap->im_ipimap; |
2201 | sbi = JFS_SBI(ipimap->i_sb); | 2199 | sbi = JFS_SBI(ipimap->i_sb); |
2202 | 2200 | ||
2203 | amp = bmp = cmp = NULL; | 2201 | amp = bmp = cmp = NULL; |
2204 | 2202 | ||
2205 | /* get the ag and iag numbers for this iag. | 2203 | /* get the ag and iag numbers for this iag. |
2206 | */ | 2204 | */ |
2207 | agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); | 2205 | agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); |
2208 | iagno = le32_to_cpu(iagp->iagnum); | 2206 | iagno = le32_to_cpu(iagp->iagnum); |
2209 | 2207 | ||
2210 | /* check if this is the last free extent within the | 2208 | /* check if this is the last free extent within the |
2211 | * iag. if so, the iag must be removed from the ag | 2209 | * iag. if so, the iag must be removed from the ag |
2212 | * free extent list, so get the iags preceeding and | 2210 | * free extent list, so get the iags preceeding and |
2213 | * following the iag on this list. | 2211 | * following the iag on this list. |
2214 | */ | 2212 | */ |
2215 | if (iagp->nfreeexts == cpu_to_le32(1)) { | 2213 | if (iagp->nfreeexts == cpu_to_le32(1)) { |
2216 | if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { | 2214 | if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { |
2217 | if ((rc = diIAGRead(imap, fwd, &))) | 2215 | if ((rc = diIAGRead(imap, fwd, &))) |
2218 | return (rc); | 2216 | return (rc); |
2219 | aiagp = (struct iag *) amp->data; | 2217 | aiagp = (struct iag *) amp->data; |
2220 | } | 2218 | } |
2221 | 2219 | ||
2222 | if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { | 2220 | if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { |
2223 | if ((rc = diIAGRead(imap, back, &bmp))) | 2221 | if ((rc = diIAGRead(imap, back, &bmp))) |
2224 | goto error_out; | 2222 | goto error_out; |
2225 | biagp = (struct iag *) bmp->data; | 2223 | biagp = (struct iag *) bmp->data; |
2226 | } | 2224 | } |
2227 | } else { | 2225 | } else { |
2228 | /* the iag has free extents. if all extents are free | 2226 | /* the iag has free extents. if all extents are free |
2229 | * (as is the case for a newly allocated iag), the iag | 2227 | * (as is the case for a newly allocated iag), the iag |
2230 | * must be added to the ag free extent list, so get | 2228 | * must be added to the ag free extent list, so get |
2231 | * the iag at the head of the list in preparation for | 2229 | * the iag at the head of the list in preparation for |
2232 | * adding this iag to this list. | 2230 | * adding this iag to this list. |
2233 | */ | 2231 | */ |
2234 | fwd = back = -1; | 2232 | fwd = back = -1; |
2235 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { | 2233 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { |
2236 | if ((fwd = imap->im_agctl[agno].extfree) >= 0) { | 2234 | if ((fwd = imap->im_agctl[agno].extfree) >= 0) { |
2237 | if ((rc = diIAGRead(imap, fwd, &))) | 2235 | if ((rc = diIAGRead(imap, fwd, &))) |
2238 | goto error_out; | 2236 | goto error_out; |
2239 | aiagp = (struct iag *) amp->data; | 2237 | aiagp = (struct iag *) amp->data; |
2240 | } | 2238 | } |
2241 | } | 2239 | } |
2242 | } | 2240 | } |
2243 | 2241 | ||
2244 | /* check if the iag has no free inodes. if so, the iag | 2242 | /* check if the iag has no free inodes. if so, the iag |
2245 | * will have to be added to the ag free inode list, so get | 2243 | * will have to be added to the ag free inode list, so get |
2246 | * the iag at the head of the list in preparation for | 2244 | * the iag at the head of the list in preparation for |
2247 | * adding this iag to this list. in doing this, we must | 2245 | * adding this iag to this list. in doing this, we must |
2248 | * check if we already have the iag at the head of | 2246 | * check if we already have the iag at the head of |
2249 | * the list in hand. | 2247 | * the list in hand. |
2250 | */ | 2248 | */ |
2251 | if (iagp->nfreeinos == 0) { | 2249 | if (iagp->nfreeinos == 0) { |
2252 | freei = imap->im_agctl[agno].inofree; | 2250 | freei = imap->im_agctl[agno].inofree; |
2253 | 2251 | ||
2254 | if (freei >= 0) { | 2252 | if (freei >= 0) { |
2255 | if (freei == fwd) { | 2253 | if (freei == fwd) { |
2256 | ciagp = aiagp; | 2254 | ciagp = aiagp; |
2257 | } else if (freei == back) { | 2255 | } else if (freei == back) { |
2258 | ciagp = biagp; | 2256 | ciagp = biagp; |
2259 | } else { | 2257 | } else { |
2260 | if ((rc = diIAGRead(imap, freei, &cmp))) | 2258 | if ((rc = diIAGRead(imap, freei, &cmp))) |
2261 | goto error_out; | 2259 | goto error_out; |
2262 | ciagp = (struct iag *) cmp->data; | 2260 | ciagp = (struct iag *) cmp->data; |
2263 | } | 2261 | } |
2264 | if (ciagp == NULL) { | 2262 | if (ciagp == NULL) { |
2265 | jfs_error(imap->im_ipimap->i_sb, | 2263 | jfs_error(imap->im_ipimap->i_sb, |
2266 | "diNewExt: ciagp == NULL"); | 2264 | "diNewExt: ciagp == NULL"); |
2267 | rc = -EIO; | 2265 | rc = -EIO; |
2268 | goto error_out; | 2266 | goto error_out; |
2269 | } | 2267 | } |
2270 | } | 2268 | } |
2271 | } | 2269 | } |
2272 | 2270 | ||
2273 | /* allocate disk space for the inode extent. | 2271 | /* allocate disk space for the inode extent. |
2274 | */ | 2272 | */ |
2275 | if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) | 2273 | if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) |
2276 | hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; | 2274 | hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; |
2277 | else | 2275 | else |
2278 | hint = addressPXD(&iagp->inoext[extno - 1]) + | 2276 | hint = addressPXD(&iagp->inoext[extno - 1]) + |
2279 | lengthPXD(&iagp->inoext[extno - 1]) - 1; | 2277 | lengthPXD(&iagp->inoext[extno - 1]) - 1; |
2280 | 2278 | ||
2281 | if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) | 2279 | if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) |
2282 | goto error_out; | 2280 | goto error_out; |
2283 | 2281 | ||
2284 | /* compute the inode number of the first inode within the | 2282 | /* compute the inode number of the first inode within the |
2285 | * extent. | 2283 | * extent. |
2286 | */ | 2284 | */ |
2287 | ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); | 2285 | ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); |
2288 | 2286 | ||
2289 | /* initialize the inodes within the newly allocated extent a | 2287 | /* initialize the inodes within the newly allocated extent a |
2290 | * page at a time. | 2288 | * page at a time. |
2291 | */ | 2289 | */ |
2292 | for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { | 2290 | for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { |
2293 | /* get a buffer for this page of disk inodes. | 2291 | /* get a buffer for this page of disk inodes. |
2294 | */ | 2292 | */ |
2295 | dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); | 2293 | dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); |
2296 | if (dmp == NULL) { | 2294 | if (dmp == NULL) { |
2297 | rc = -EIO; | 2295 | rc = -EIO; |
2298 | goto error_out; | 2296 | goto error_out; |
2299 | } | 2297 | } |
2300 | dp = (struct dinode *) dmp->data; | 2298 | dp = (struct dinode *) dmp->data; |
2301 | 2299 | ||
2302 | /* initialize the inode number, mode, link count and | 2300 | /* initialize the inode number, mode, link count and |
2303 | * inode extent address. | 2301 | * inode extent address. |
2304 | */ | 2302 | */ |
2305 | for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { | 2303 | for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { |
2306 | dp->di_inostamp = cpu_to_le32(sbi->inostamp); | 2304 | dp->di_inostamp = cpu_to_le32(sbi->inostamp); |
2307 | dp->di_number = cpu_to_le32(ino); | 2305 | dp->di_number = cpu_to_le32(ino); |
2308 | dp->di_fileset = cpu_to_le32(FILESYSTEM_I); | 2306 | dp->di_fileset = cpu_to_le32(FILESYSTEM_I); |
2309 | dp->di_mode = 0; | 2307 | dp->di_mode = 0; |
2310 | dp->di_nlink = 0; | 2308 | dp->di_nlink = 0; |
2311 | PXDaddress(&(dp->di_ixpxd), blkno); | 2309 | PXDaddress(&(dp->di_ixpxd), blkno); |
2312 | PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); | 2310 | PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); |
2313 | } | 2311 | } |
2314 | write_metapage(dmp); | 2312 | write_metapage(dmp); |
2315 | } | 2313 | } |
2316 | 2314 | ||
2317 | /* if this is the last free extent within the iag, remove the | 2315 | /* if this is the last free extent within the iag, remove the |
2318 | * iag from the ag free extent list. | 2316 | * iag from the ag free extent list. |
2319 | */ | 2317 | */ |
2320 | if (iagp->nfreeexts == cpu_to_le32(1)) { | 2318 | if (iagp->nfreeexts == cpu_to_le32(1)) { |
2321 | if (fwd >= 0) | 2319 | if (fwd >= 0) |
2322 | aiagp->extfreeback = iagp->extfreeback; | 2320 | aiagp->extfreeback = iagp->extfreeback; |
2323 | 2321 | ||
2324 | if (back >= 0) | 2322 | if (back >= 0) |
2325 | biagp->extfreefwd = iagp->extfreefwd; | 2323 | biagp->extfreefwd = iagp->extfreefwd; |
2326 | else | 2324 | else |
2327 | imap->im_agctl[agno].extfree = | 2325 | imap->im_agctl[agno].extfree = |
2328 | le32_to_cpu(iagp->extfreefwd); | 2326 | le32_to_cpu(iagp->extfreefwd); |
2329 | 2327 | ||
2330 | iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); | 2328 | iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); |
2331 | } else { | 2329 | } else { |
2332 | /* if the iag has all free extents (newly allocated iag), | 2330 | /* if the iag has all free extents (newly allocated iag), |
2333 | * add the iag to the ag free extent list. | 2331 | * add the iag to the ag free extent list. |
2334 | */ | 2332 | */ |
2335 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { | 2333 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { |
2336 | if (fwd >= 0) | 2334 | if (fwd >= 0) |
2337 | aiagp->extfreeback = cpu_to_le32(iagno); | 2335 | aiagp->extfreeback = cpu_to_le32(iagno); |
2338 | 2336 | ||
2339 | iagp->extfreefwd = cpu_to_le32(fwd); | 2337 | iagp->extfreefwd = cpu_to_le32(fwd); |
2340 | iagp->extfreeback = cpu_to_le32(-1); | 2338 | iagp->extfreeback = cpu_to_le32(-1); |
2341 | imap->im_agctl[agno].extfree = iagno; | 2339 | imap->im_agctl[agno].extfree = iagno; |
2342 | } | 2340 | } |
2343 | } | 2341 | } |
2344 | 2342 | ||
2345 | /* if the iag has no free inodes, add the iag to the | 2343 | /* if the iag has no free inodes, add the iag to the |
2346 | * ag free inode list. | 2344 | * ag free inode list. |
2347 | */ | 2345 | */ |
2348 | if (iagp->nfreeinos == 0) { | 2346 | if (iagp->nfreeinos == 0) { |
2349 | if (freei >= 0) | 2347 | if (freei >= 0) |
2350 | ciagp->inofreeback = cpu_to_le32(iagno); | 2348 | ciagp->inofreeback = cpu_to_le32(iagno); |
2351 | 2349 | ||
2352 | iagp->inofreefwd = | 2350 | iagp->inofreefwd = |
2353 | cpu_to_le32(imap->im_agctl[agno].inofree); | 2351 | cpu_to_le32(imap->im_agctl[agno].inofree); |
2354 | iagp->inofreeback = cpu_to_le32(-1); | 2352 | iagp->inofreeback = cpu_to_le32(-1); |
2355 | imap->im_agctl[agno].inofree = iagno; | 2353 | imap->im_agctl[agno].inofree = iagno; |
2356 | } | 2354 | } |
2357 | 2355 | ||
2358 | /* initialize the extent descriptor of the extent. */ | 2356 | /* initialize the extent descriptor of the extent. */ |
2359 | PXDlength(&iagp->inoext[extno], imap->im_nbperiext); | 2357 | PXDlength(&iagp->inoext[extno], imap->im_nbperiext); |
2360 | PXDaddress(&iagp->inoext[extno], blkno); | 2358 | PXDaddress(&iagp->inoext[extno], blkno); |
2361 | 2359 | ||
2362 | /* initialize the working and persistent map of the extent. | 2360 | /* initialize the working and persistent map of the extent. |
2363 | * the working map will be initialized such that | 2361 | * the working map will be initialized such that |
2364 | * it indicates the first inode of the extent is allocated. | 2362 | * it indicates the first inode of the extent is allocated. |
2365 | */ | 2363 | */ |
2366 | iagp->wmap[extno] = cpu_to_le32(HIGHORDER); | 2364 | iagp->wmap[extno] = cpu_to_le32(HIGHORDER); |
2367 | iagp->pmap[extno] = 0; | 2365 | iagp->pmap[extno] = 0; |
2368 | 2366 | ||
2369 | /* update the free inode and free extent summary maps | 2367 | /* update the free inode and free extent summary maps |
2370 | * for the extent to indicate the extent has free inodes | 2368 | * for the extent to indicate the extent has free inodes |
2371 | * and no longer represents a free extent. | 2369 | * and no longer represents a free extent. |
2372 | */ | 2370 | */ |
2373 | sword = extno >> L2EXTSPERSUM; | 2371 | sword = extno >> L2EXTSPERSUM; |
2374 | mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); | 2372 | mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); |
2375 | iagp->extsmap[sword] |= cpu_to_le32(mask); | 2373 | iagp->extsmap[sword] |= cpu_to_le32(mask); |
2376 | iagp->inosmap[sword] &= cpu_to_le32(~mask); | 2374 | iagp->inosmap[sword] &= cpu_to_le32(~mask); |
2377 | 2375 | ||
2378 | /* update the free inode and free extent counts for the | 2376 | /* update the free inode and free extent counts for the |
2379 | * iag. | 2377 | * iag. |
2380 | */ | 2378 | */ |
2381 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + | 2379 | le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); |
2382 | (INOSPEREXT - 1)); | 2380 | le32_add_cpu(&iagp->nfreeexts, -1); |
2383 | iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1); | ||
2384 | 2381 | ||
2385 | /* update the free and backed inode counts for the ag. | 2382 | /* update the free and backed inode counts for the ag. |
2386 | */ | 2383 | */ |
2387 | imap->im_agctl[agno].numfree += (INOSPEREXT - 1); | 2384 | imap->im_agctl[agno].numfree += (INOSPEREXT - 1); |
2388 | imap->im_agctl[agno].numinos += INOSPEREXT; | 2385 | imap->im_agctl[agno].numinos += INOSPEREXT; |
2389 | 2386 | ||
2390 | /* update the free and backed inode counts for the inode map. | 2387 | /* update the free and backed inode counts for the inode map. |
2391 | */ | 2388 | */ |
2392 | atomic_add(INOSPEREXT - 1, &imap->im_numfree); | 2389 | atomic_add(INOSPEREXT - 1, &imap->im_numfree); |
2393 | atomic_add(INOSPEREXT, &imap->im_numinos); | 2390 | atomic_add(INOSPEREXT, &imap->im_numinos); |
2394 | 2391 | ||
2395 | /* write the iags. | 2392 | /* write the iags. |
2396 | */ | 2393 | */ |
2397 | if (amp) | 2394 | if (amp) |
2398 | write_metapage(amp); | 2395 | write_metapage(amp); |
2399 | if (bmp) | 2396 | if (bmp) |
2400 | write_metapage(bmp); | 2397 | write_metapage(bmp); |
2401 | if (cmp) | 2398 | if (cmp) |
2402 | write_metapage(cmp); | 2399 | write_metapage(cmp); |
2403 | 2400 | ||
2404 | return (0); | 2401 | return (0); |
2405 | 2402 | ||
2406 | error_out: | 2403 | error_out: |
2407 | 2404 | ||
2408 | /* release the iags. | 2405 | /* release the iags. |
2409 | */ | 2406 | */ |
2410 | if (amp) | 2407 | if (amp) |
2411 | release_metapage(amp); | 2408 | release_metapage(amp); |
2412 | if (bmp) | 2409 | if (bmp) |
2413 | release_metapage(bmp); | 2410 | release_metapage(bmp); |
2414 | if (cmp) | 2411 | if (cmp) |
2415 | release_metapage(cmp); | 2412 | release_metapage(cmp); |
2416 | 2413 | ||
2417 | return (rc); | 2414 | return (rc); |
2418 | } | 2415 | } |
2419 | 2416 | ||
2420 | 2417 | ||
2421 | /* | 2418 | /* |
2422 | * NAME: diNewIAG(imap,iagnop,agno) | 2419 | * NAME: diNewIAG(imap,iagnop,agno) |
2423 | * | 2420 | * |
2424 | * FUNCTION: allocate a new iag for an allocation group. | 2421 | * FUNCTION: allocate a new iag for an allocation group. |
2425 | * | 2422 | * |
2426 | * first tries to allocate the iag from the inode map | 2423 | * first tries to allocate the iag from the inode map |
2427 | * iagfree list: | 2424 | * iagfree list: |
2428 | * if the list has free iags, the head of the list is removed | 2425 | * if the list has free iags, the head of the list is removed |
2429 | * and returned to satisfy the request. | 2426 | * and returned to satisfy the request. |
2430 | * if the inode map's iag free list is empty, the inode map | 2427 | * if the inode map's iag free list is empty, the inode map |
2431 | * is extended to hold a new iag. this new iag is initialized | 2428 | * is extended to hold a new iag. this new iag is initialized |
2432 | * and returned to satisfy the request. | 2429 | * and returned to satisfy the request. |
2433 | * | 2430 | * |
2434 | * PARAMETERS: | 2431 | * PARAMETERS: |
2435 | * imap - pointer to inode map control structure. | 2432 | * imap - pointer to inode map control structure. |
2436 | * iagnop - pointer to an iag number set with the number of the | 2433 | * iagnop - pointer to an iag number set with the number of the |
2437 | * newly allocated iag upon successful return. | 2434 | * newly allocated iag upon successful return. |
2438 | * agno - allocation group number. | 2435 | * agno - allocation group number. |
2439 | * bpp - Buffer pointer to be filled in with new IAG's buffer | 2436 | * bpp - Buffer pointer to be filled in with new IAG's buffer |
2440 | * | 2437 | * |
2441 | * RETURN VALUES: | 2438 | * RETURN VALUES: |
2442 | * 0 - success. | 2439 | * 0 - success. |
2443 | * -ENOSPC - insufficient disk resources. | 2440 | * -ENOSPC - insufficient disk resources. |
2444 | * -EIO - i/o error. | 2441 | * -EIO - i/o error. |
2445 | * | 2442 | * |
2446 | * serialization: | 2443 | * serialization: |
2447 | * AG lock held on entry/exit; | 2444 | * AG lock held on entry/exit; |
2448 | * write lock on the map is held inside; | 2445 | * write lock on the map is held inside; |
2449 | * read lock on the map is held on successful completion; | 2446 | * read lock on the map is held on successful completion; |
2450 | * | 2447 | * |
2451 | * note: new iag transaction: | 2448 | * note: new iag transaction: |
2452 | * . synchronously write iag; | 2449 | * . synchronously write iag; |
2453 | * . write log of xtree and inode of imap; | 2450 | * . write log of xtree and inode of imap; |
2454 | * . commit; | 2451 | * . commit; |
2455 | * . synchronous write of xtree (right to left, bottom to top); | 2452 | * . synchronous write of xtree (right to left, bottom to top); |
2456 | * . at start of logredo(): init in-memory imap with one additional iag page; | 2453 | * . at start of logredo(): init in-memory imap with one additional iag page; |
2457 | * . at end of logredo(): re-read imap inode to determine | 2454 | * . at end of logredo(): re-read imap inode to determine |
2458 | * new imap size; | 2455 | * new imap size; |
2459 | */ | 2456 | */ |
2460 | static int | 2457 | static int |
2461 | diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | 2458 | diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) |
2462 | { | 2459 | { |
2463 | int rc; | 2460 | int rc; |
2464 | int iagno, i, xlen; | 2461 | int iagno, i, xlen; |
2465 | struct inode *ipimap; | 2462 | struct inode *ipimap; |
2466 | struct super_block *sb; | 2463 | struct super_block *sb; |
2467 | struct jfs_sb_info *sbi; | 2464 | struct jfs_sb_info *sbi; |
2468 | struct metapage *mp; | 2465 | struct metapage *mp; |
2469 | struct iag *iagp; | 2466 | struct iag *iagp; |
2470 | s64 xaddr = 0; | 2467 | s64 xaddr = 0; |
2471 | s64 blkno; | 2468 | s64 blkno; |
2472 | tid_t tid; | 2469 | tid_t tid; |
2473 | struct inode *iplist[1]; | 2470 | struct inode *iplist[1]; |
2474 | 2471 | ||
2475 | /* pick up pointers to the inode map and mount inodes */ | 2472 | /* pick up pointers to the inode map and mount inodes */ |
2476 | ipimap = imap->im_ipimap; | 2473 | ipimap = imap->im_ipimap; |
2477 | sb = ipimap->i_sb; | 2474 | sb = ipimap->i_sb; |
2478 | sbi = JFS_SBI(sb); | 2475 | sbi = JFS_SBI(sb); |
2479 | 2476 | ||
2480 | /* acquire the free iag lock */ | 2477 | /* acquire the free iag lock */ |
2481 | IAGFREE_LOCK(imap); | 2478 | IAGFREE_LOCK(imap); |
2482 | 2479 | ||
2483 | /* if there are any iags on the inode map free iag list, | 2480 | /* if there are any iags on the inode map free iag list, |
2484 | * allocate the iag from the head of the list. | 2481 | * allocate the iag from the head of the list. |
2485 | */ | 2482 | */ |
2486 | if (imap->im_freeiag >= 0) { | 2483 | if (imap->im_freeiag >= 0) { |
2487 | /* pick up the iag number at the head of the list */ | 2484 | /* pick up the iag number at the head of the list */ |
2488 | iagno = imap->im_freeiag; | 2485 | iagno = imap->im_freeiag; |
2489 | 2486 | ||
2490 | /* determine the logical block number of the iag */ | 2487 | /* determine the logical block number of the iag */ |
2491 | blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); | 2488 | blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); |
2492 | } else { | 2489 | } else { |
2493 | /* no free iags. the inode map will have to be extented | 2490 | /* no free iags. the inode map will have to be extented |
2494 | * to include a new iag. | 2491 | * to include a new iag. |
2495 | */ | 2492 | */ |
2496 | 2493 | ||
2497 | /* acquire inode map lock */ | 2494 | /* acquire inode map lock */ |
2498 | IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); | 2495 | IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); |
2499 | 2496 | ||
2500 | if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { | 2497 | if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { |
2501 | IWRITE_UNLOCK(ipimap); | 2498 | IWRITE_UNLOCK(ipimap); |
2502 | IAGFREE_UNLOCK(imap); | 2499 | IAGFREE_UNLOCK(imap); |
2503 | jfs_error(imap->im_ipimap->i_sb, | 2500 | jfs_error(imap->im_ipimap->i_sb, |
2504 | "diNewIAG: ipimap->i_size is wrong"); | 2501 | "diNewIAG: ipimap->i_size is wrong"); |
2505 | return -EIO; | 2502 | return -EIO; |
2506 | } | 2503 | } |
2507 | 2504 | ||
2508 | 2505 | ||
2509 | /* get the next avaliable iag number */ | 2506 | /* get the next avaliable iag number */ |
2510 | iagno = imap->im_nextiag; | 2507 | iagno = imap->im_nextiag; |
2511 | 2508 | ||
2512 | /* make sure that we have not exceeded the maximum inode | 2509 | /* make sure that we have not exceeded the maximum inode |
2513 | * number limit. | 2510 | * number limit. |
2514 | */ | 2511 | */ |
2515 | if (iagno > (MAXIAGS - 1)) { | 2512 | if (iagno > (MAXIAGS - 1)) { |
2516 | /* release the inode map lock */ | 2513 | /* release the inode map lock */ |
2517 | IWRITE_UNLOCK(ipimap); | 2514 | IWRITE_UNLOCK(ipimap); |
2518 | 2515 | ||
2519 | rc = -ENOSPC; | 2516 | rc = -ENOSPC; |
2520 | goto out; | 2517 | goto out; |
2521 | } | 2518 | } |
2522 | 2519 | ||
2523 | /* | 2520 | /* |
2524 | * synchronously append new iag page. | 2521 | * synchronously append new iag page. |
2525 | */ | 2522 | */ |
2526 | /* determine the logical address of iag page to append */ | 2523 | /* determine the logical address of iag page to append */ |
2527 | blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); | 2524 | blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); |
2528 | 2525 | ||
2529 | /* Allocate extent for new iag page */ | 2526 | /* Allocate extent for new iag page */ |
2530 | xlen = sbi->nbperpage; | 2527 | xlen = sbi->nbperpage; |
2531 | if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { | 2528 | if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { |
2532 | /* release the inode map lock */ | 2529 | /* release the inode map lock */ |
2533 | IWRITE_UNLOCK(ipimap); | 2530 | IWRITE_UNLOCK(ipimap); |
2534 | 2531 | ||
2535 | goto out; | 2532 | goto out; |
2536 | } | 2533 | } |
2537 | 2534 | ||
2538 | /* | 2535 | /* |
2539 | * start transaction of update of the inode map | 2536 | * start transaction of update of the inode map |
2540 | * addressing structure pointing to the new iag page; | 2537 | * addressing structure pointing to the new iag page; |
2541 | */ | 2538 | */ |
2542 | tid = txBegin(sb, COMMIT_FORCE); | 2539 | tid = txBegin(sb, COMMIT_FORCE); |
2543 | mutex_lock(&JFS_IP(ipimap)->commit_mutex); | 2540 | mutex_lock(&JFS_IP(ipimap)->commit_mutex); |
2544 | 2541 | ||
2545 | /* update the inode map addressing structure to point to it */ | 2542 | /* update the inode map addressing structure to point to it */ |
2546 | if ((rc = | 2543 | if ((rc = |
2547 | xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { | 2544 | xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { |
2548 | txEnd(tid); | 2545 | txEnd(tid); |
2549 | mutex_unlock(&JFS_IP(ipimap)->commit_mutex); | 2546 | mutex_unlock(&JFS_IP(ipimap)->commit_mutex); |
2550 | /* Free the blocks allocated for the iag since it was | 2547 | /* Free the blocks allocated for the iag since it was |
2551 | * not successfully added to the inode map | 2548 | * not successfully added to the inode map |
2552 | */ | 2549 | */ |
2553 | dbFree(ipimap, xaddr, (s64) xlen); | 2550 | dbFree(ipimap, xaddr, (s64) xlen); |
2554 | 2551 | ||
2555 | /* release the inode map lock */ | 2552 | /* release the inode map lock */ |
2556 | IWRITE_UNLOCK(ipimap); | 2553 | IWRITE_UNLOCK(ipimap); |
2557 | 2554 | ||
2558 | goto out; | 2555 | goto out; |
2559 | } | 2556 | } |
2560 | 2557 | ||
2561 | /* update the inode map's inode to reflect the extension */ | 2558 | /* update the inode map's inode to reflect the extension */ |
2562 | ipimap->i_size += PSIZE; | 2559 | ipimap->i_size += PSIZE; |
2563 | inode_add_bytes(ipimap, PSIZE); | 2560 | inode_add_bytes(ipimap, PSIZE); |
2564 | 2561 | ||
2565 | /* assign a buffer for the page */ | 2562 | /* assign a buffer for the page */ |
2566 | mp = get_metapage(ipimap, blkno, PSIZE, 0); | 2563 | mp = get_metapage(ipimap, blkno, PSIZE, 0); |
2567 | if (!mp) { | 2564 | if (!mp) { |
2568 | /* | 2565 | /* |
2569 | * This is very unlikely since we just created the | 2566 | * This is very unlikely since we just created the |
2570 | * extent, but let's try to handle it correctly | 2567 | * extent, but let's try to handle it correctly |
2571 | */ | 2568 | */ |
2572 | xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, | 2569 | xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, |
2573 | COMMIT_PWMAP); | 2570 | COMMIT_PWMAP); |
2574 | 2571 | ||
2575 | txAbort(tid, 0); | 2572 | txAbort(tid, 0); |
2576 | txEnd(tid); | 2573 | txEnd(tid); |
2577 | 2574 | ||
2578 | /* release the inode map lock */ | 2575 | /* release the inode map lock */ |
2579 | IWRITE_UNLOCK(ipimap); | 2576 | IWRITE_UNLOCK(ipimap); |
2580 | 2577 | ||
2581 | rc = -EIO; | 2578 | rc = -EIO; |
2582 | goto out; | 2579 | goto out; |
2583 | } | 2580 | } |
2584 | iagp = (struct iag *) mp->data; | 2581 | iagp = (struct iag *) mp->data; |
2585 | 2582 | ||
2586 | /* init the iag */ | 2583 | /* init the iag */ |
2587 | memset(iagp, 0, sizeof(struct iag)); | 2584 | memset(iagp, 0, sizeof(struct iag)); |
2588 | iagp->iagnum = cpu_to_le32(iagno); | 2585 | iagp->iagnum = cpu_to_le32(iagno); |
2589 | iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); | 2586 | iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); |
2590 | iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); | 2587 | iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); |
2591 | iagp->iagfree = cpu_to_le32(-1); | 2588 | iagp->iagfree = cpu_to_le32(-1); |
2592 | iagp->nfreeinos = 0; | 2589 | iagp->nfreeinos = 0; |
2593 | iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); | 2590 | iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); |
2594 | 2591 | ||
2595 | /* initialize the free inode summary map (free extent | 2592 | /* initialize the free inode summary map (free extent |
2596 | * summary map initialization handled by bzero). | 2593 | * summary map initialization handled by bzero). |
2597 | */ | 2594 | */ |
2598 | for (i = 0; i < SMAPSZ; i++) | 2595 | for (i = 0; i < SMAPSZ; i++) |
2599 | iagp->inosmap[i] = cpu_to_le32(ONES); | 2596 | iagp->inosmap[i] = cpu_to_le32(ONES); |
2600 | 2597 | ||
2601 | /* | 2598 | /* |
2602 | * Write and sync the metapage | 2599 | * Write and sync the metapage |
2603 | */ | 2600 | */ |
2604 | flush_metapage(mp); | 2601 | flush_metapage(mp); |
2605 | 2602 | ||
2606 | /* | 2603 | /* |
2607 | * txCommit(COMMIT_FORCE) will synchronously write address | 2604 | * txCommit(COMMIT_FORCE) will synchronously write address |
2608 | * index pages and inode after commit in careful update order | 2605 | * index pages and inode after commit in careful update order |
2609 | * of address index pages (right to left, bottom up); | 2606 | * of address index pages (right to left, bottom up); |
2610 | */ | 2607 | */ |
2611 | iplist[0] = ipimap; | 2608 | iplist[0] = ipimap; |
2612 | rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); | 2609 | rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); |
2613 | 2610 | ||
2614 | txEnd(tid); | 2611 | txEnd(tid); |
2615 | mutex_unlock(&JFS_IP(ipimap)->commit_mutex); | 2612 | mutex_unlock(&JFS_IP(ipimap)->commit_mutex); |
2616 | 2613 | ||
2617 | duplicateIXtree(sb, blkno, xlen, &xaddr); | 2614 | duplicateIXtree(sb, blkno, xlen, &xaddr); |
2618 | 2615 | ||
2619 | /* update the next avaliable iag number */ | 2616 | /* update the next avaliable iag number */ |
2620 | imap->im_nextiag += 1; | 2617 | imap->im_nextiag += 1; |
2621 | 2618 | ||
2622 | /* Add the iag to the iag free list so we don't lose the iag | 2619 | /* Add the iag to the iag free list so we don't lose the iag |
2623 | * if a failure happens now. | 2620 | * if a failure happens now. |
2624 | */ | 2621 | */ |
2625 | imap->im_freeiag = iagno; | 2622 | imap->im_freeiag = iagno; |
2626 | 2623 | ||
2627 | /* Until we have logredo working, we want the imap inode & | 2624 | /* Until we have logredo working, we want the imap inode & |
2628 | * control page to be up to date. | 2625 | * control page to be up to date. |
2629 | */ | 2626 | */ |
2630 | diSync(ipimap); | 2627 | diSync(ipimap); |
2631 | 2628 | ||
2632 | /* release the inode map lock */ | 2629 | /* release the inode map lock */ |
2633 | IWRITE_UNLOCK(ipimap); | 2630 | IWRITE_UNLOCK(ipimap); |
2634 | } | 2631 | } |
2635 | 2632 | ||
2636 | /* obtain read lock on map */ | 2633 | /* obtain read lock on map */ |
2637 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); | 2634 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
2638 | 2635 | ||
2639 | /* read the iag */ | 2636 | /* read the iag */ |
2640 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 2637 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
2641 | IREAD_UNLOCK(ipimap); | 2638 | IREAD_UNLOCK(ipimap); |
2642 | rc = -EIO; | 2639 | rc = -EIO; |
2643 | goto out; | 2640 | goto out; |
2644 | } | 2641 | } |
2645 | iagp = (struct iag *) mp->data; | 2642 | iagp = (struct iag *) mp->data; |
2646 | 2643 | ||
2647 | /* remove the iag from the iag free list */ | 2644 | /* remove the iag from the iag free list */ |
2648 | imap->im_freeiag = le32_to_cpu(iagp->iagfree); | 2645 | imap->im_freeiag = le32_to_cpu(iagp->iagfree); |
2649 | iagp->iagfree = cpu_to_le32(-1); | 2646 | iagp->iagfree = cpu_to_le32(-1); |
2650 | 2647 | ||
2651 | /* set the return iag number and buffer pointer */ | 2648 | /* set the return iag number and buffer pointer */ |
2652 | *iagnop = iagno; | 2649 | *iagnop = iagno; |
2653 | *mpp = mp; | 2650 | *mpp = mp; |
2654 | 2651 | ||
2655 | out: | 2652 | out: |
2656 | /* release the iag free lock */ | 2653 | /* release the iag free lock */ |
2657 | IAGFREE_UNLOCK(imap); | 2654 | IAGFREE_UNLOCK(imap); |
2658 | 2655 | ||
2659 | return (rc); | 2656 | return (rc); |
2660 | } | 2657 | } |
2661 | 2658 | ||
2662 | /* | 2659 | /* |
2663 | * NAME: diIAGRead() | 2660 | * NAME: diIAGRead() |
2664 | * | 2661 | * |
2665 | * FUNCTION: get the buffer for the specified iag within a fileset | 2662 | * FUNCTION: get the buffer for the specified iag within a fileset |
2666 | * or aggregate inode map. | 2663 | * or aggregate inode map. |
2667 | * | 2664 | * |
2668 | * PARAMETERS: | 2665 | * PARAMETERS: |
2669 | * imap - pointer to inode map control structure. | 2666 | * imap - pointer to inode map control structure. |
2670 | * iagno - iag number. | 2667 | * iagno - iag number. |
2671 | * bpp - point to buffer pointer to be filled in on successful | 2668 | * bpp - point to buffer pointer to be filled in on successful |
2672 | * exit. | 2669 | * exit. |
2673 | * | 2670 | * |
2674 | * SERIALIZATION: | 2671 | * SERIALIZATION: |
2675 | * must have read lock on imap inode | 2672 | * must have read lock on imap inode |
2676 | * (When called by diExtendFS, the filesystem is quiesced, therefore | 2673 | * (When called by diExtendFS, the filesystem is quiesced, therefore |
2677 | * the read lock is unnecessary.) | 2674 | * the read lock is unnecessary.) |
2678 | * | 2675 | * |
2679 | * RETURN VALUES: | 2676 | * RETURN VALUES: |
2680 | * 0 - success. | 2677 | * 0 - success. |
2681 | * -EIO - i/o error. | 2678 | * -EIO - i/o error. |
2682 | */ | 2679 | */ |
2683 | static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) | 2680 | static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) |
2684 | { | 2681 | { |
2685 | struct inode *ipimap = imap->im_ipimap; | 2682 | struct inode *ipimap = imap->im_ipimap; |
2686 | s64 blkno; | 2683 | s64 blkno; |
2687 | 2684 | ||
2688 | /* compute the logical block number of the iag. */ | 2685 | /* compute the logical block number of the iag. */ |
2689 | blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); | 2686 | blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); |
2690 | 2687 | ||
2691 | /* read the iag. */ | 2688 | /* read the iag. */ |
2692 | *mpp = read_metapage(ipimap, blkno, PSIZE, 0); | 2689 | *mpp = read_metapage(ipimap, blkno, PSIZE, 0); |
2693 | if (*mpp == NULL) { | 2690 | if (*mpp == NULL) { |
2694 | return -EIO; | 2691 | return -EIO; |
2695 | } | 2692 | } |
2696 | 2693 | ||
2697 | return (0); | 2694 | return (0); |
2698 | } | 2695 | } |
2699 | 2696 | ||
2700 | /* | 2697 | /* |
2701 | * NAME: diFindFree() | 2698 | * NAME: diFindFree() |
2702 | * | 2699 | * |
2703 | * FUNCTION: find the first free bit in a word starting at | 2700 | * FUNCTION: find the first free bit in a word starting at |
2704 | * the specified bit position. | 2701 | * the specified bit position. |
2705 | * | 2702 | * |
2706 | * PARAMETERS: | 2703 | * PARAMETERS: |
2707 | * word - word to be examined. | 2704 | * word - word to be examined. |
2708 | * start - starting bit position. | 2705 | * start - starting bit position. |
2709 | * | 2706 | * |
2710 | * RETURN VALUES: | 2707 | * RETURN VALUES: |
2711 | * bit position of first free bit in the word or 32 if | 2708 | * bit position of first free bit in the word or 32 if |
2712 | * no free bits were found. | 2709 | * no free bits were found. |
2713 | */ | 2710 | */ |
2714 | static int diFindFree(u32 word, int start) | 2711 | static int diFindFree(u32 word, int start) |
2715 | { | 2712 | { |
2716 | int bitno; | 2713 | int bitno; |
2717 | assert(start < 32); | 2714 | assert(start < 32); |
2718 | /* scan the word for the first free bit. */ | 2715 | /* scan the word for the first free bit. */ |
2719 | for (word <<= start, bitno = start; bitno < 32; | 2716 | for (word <<= start, bitno = start; bitno < 32; |
2720 | bitno++, word <<= 1) { | 2717 | bitno++, word <<= 1) { |
2721 | if ((word & HIGHORDER) == 0) | 2718 | if ((word & HIGHORDER) == 0) |
2722 | break; | 2719 | break; |
2723 | } | 2720 | } |
2724 | return (bitno); | 2721 | return (bitno); |
2725 | } | 2722 | } |
2726 | 2723 | ||
2727 | /* | 2724 | /* |
2728 | * NAME: diUpdatePMap() | 2725 | * NAME: diUpdatePMap() |
2729 | * | 2726 | * |
2730 | * FUNCTION: Update the persistent map in an IAG for the allocation or | 2727 | * FUNCTION: Update the persistent map in an IAG for the allocation or |
2731 | * freeing of the specified inode. | 2728 | * freeing of the specified inode. |
2732 | * | 2729 | * |
2733 | * PRE CONDITIONS: Working map has already been updated for allocate. | 2730 | * PRE CONDITIONS: Working map has already been updated for allocate. |
2734 | * | 2731 | * |
2735 | * PARAMETERS: | 2732 | * PARAMETERS: |
2736 | * ipimap - Incore inode map inode | 2733 | * ipimap - Incore inode map inode |
2737 | * inum - Number of inode to mark in permanent map | 2734 | * inum - Number of inode to mark in permanent map |
2738 | * is_free - If 'true' indicates inode should be marked freed, otherwise | 2735 | * is_free - If 'true' indicates inode should be marked freed, otherwise |
2739 | * indicates inode should be marked allocated. | 2736 | * indicates inode should be marked allocated. |
2740 | * | 2737 | * |
2741 | * RETURN VALUES: | 2738 | * RETURN VALUES: |
2742 | * 0 for success | 2739 | * 0 for success |
2743 | */ | 2740 | */ |
2744 | int | 2741 | int |
2745 | diUpdatePMap(struct inode *ipimap, | 2742 | diUpdatePMap(struct inode *ipimap, |
2746 | unsigned long inum, bool is_free, struct tblock * tblk) | 2743 | unsigned long inum, bool is_free, struct tblock * tblk) |
2747 | { | 2744 | { |
2748 | int rc; | 2745 | int rc; |
2749 | struct iag *iagp; | 2746 | struct iag *iagp; |
2750 | struct metapage *mp; | 2747 | struct metapage *mp; |
2751 | int iagno, ino, extno, bitno; | 2748 | int iagno, ino, extno, bitno; |
2752 | struct inomap *imap; | 2749 | struct inomap *imap; |
2753 | u32 mask; | 2750 | u32 mask; |
2754 | struct jfs_log *log; | 2751 | struct jfs_log *log; |
2755 | int lsn, difft, diffp; | 2752 | int lsn, difft, diffp; |
2756 | unsigned long flags; | 2753 | unsigned long flags; |
2757 | 2754 | ||
2758 | imap = JFS_IP(ipimap)->i_imap; | 2755 | imap = JFS_IP(ipimap)->i_imap; |
2759 | /* get the iag number containing the inode */ | 2756 | /* get the iag number containing the inode */ |
2760 | iagno = INOTOIAG(inum); | 2757 | iagno = INOTOIAG(inum); |
2761 | /* make sure that the iag is contained within the map */ | 2758 | /* make sure that the iag is contained within the map */ |
2762 | if (iagno >= imap->im_nextiag) { | 2759 | if (iagno >= imap->im_nextiag) { |
2763 | jfs_error(ipimap->i_sb, | 2760 | jfs_error(ipimap->i_sb, |
2764 | "diUpdatePMap: the iag is outside the map"); | 2761 | "diUpdatePMap: the iag is outside the map"); |
2765 | return -EIO; | 2762 | return -EIO; |
2766 | } | 2763 | } |
2767 | /* read the iag */ | 2764 | /* read the iag */ |
2768 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); | 2765 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
2769 | rc = diIAGRead(imap, iagno, &mp); | 2766 | rc = diIAGRead(imap, iagno, &mp); |
2770 | IREAD_UNLOCK(ipimap); | 2767 | IREAD_UNLOCK(ipimap); |
2771 | if (rc) | 2768 | if (rc) |
2772 | return (rc); | 2769 | return (rc); |
2773 | metapage_wait_for_io(mp); | 2770 | metapage_wait_for_io(mp); |
2774 | iagp = (struct iag *) mp->data; | 2771 | iagp = (struct iag *) mp->data; |
2775 | /* get the inode number and extent number of the inode within | 2772 | /* get the inode number and extent number of the inode within |
2776 | * the iag and the inode number within the extent. | 2773 | * the iag and the inode number within the extent. |
2777 | */ | 2774 | */ |
2778 | ino = inum & (INOSPERIAG - 1); | 2775 | ino = inum & (INOSPERIAG - 1); |
2779 | extno = ino >> L2INOSPEREXT; | 2776 | extno = ino >> L2INOSPEREXT; |
2780 | bitno = ino & (INOSPEREXT - 1); | 2777 | bitno = ino & (INOSPEREXT - 1); |
2781 | mask = HIGHORDER >> bitno; | 2778 | mask = HIGHORDER >> bitno; |
2782 | /* | 2779 | /* |
2783 | * mark the inode free in persistent map: | 2780 | * mark the inode free in persistent map: |
2784 | */ | 2781 | */ |
2785 | if (is_free) { | 2782 | if (is_free) { |
2786 | /* The inode should have been allocated both in working | 2783 | /* The inode should have been allocated both in working |
2787 | * map and in persistent map; | 2784 | * map and in persistent map; |
2788 | * the inode will be freed from working map at the release | 2785 | * the inode will be freed from working map at the release |
2789 | * of last reference release; | 2786 | * of last reference release; |
2790 | */ | 2787 | */ |
2791 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 2788 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
2792 | jfs_error(ipimap->i_sb, | 2789 | jfs_error(ipimap->i_sb, |
2793 | "diUpdatePMap: inode %ld not marked as " | 2790 | "diUpdatePMap: inode %ld not marked as " |
2794 | "allocated in wmap!", inum); | 2791 | "allocated in wmap!", inum); |
2795 | } | 2792 | } |
2796 | if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { | 2793 | if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { |
2797 | jfs_error(ipimap->i_sb, | 2794 | jfs_error(ipimap->i_sb, |
2798 | "diUpdatePMap: inode %ld not marked as " | 2795 | "diUpdatePMap: inode %ld not marked as " |
2799 | "allocated in pmap!", inum); | 2796 | "allocated in pmap!", inum); |
2800 | } | 2797 | } |
2801 | /* update the bitmap for the extent of the freed inode */ | 2798 | /* update the bitmap for the extent of the freed inode */ |
2802 | iagp->pmap[extno] &= cpu_to_le32(~mask); | 2799 | iagp->pmap[extno] &= cpu_to_le32(~mask); |
2803 | } | 2800 | } |
2804 | /* | 2801 | /* |
2805 | * mark the inode allocated in persistent map: | 2802 | * mark the inode allocated in persistent map: |
2806 | */ | 2803 | */ |
2807 | else { | 2804 | else { |
2808 | /* The inode should be already allocated in the working map | 2805 | /* The inode should be already allocated in the working map |
2809 | * and should be free in persistent map; | 2806 | * and should be free in persistent map; |
2810 | */ | 2807 | */ |
2811 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 2808 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
2812 | release_metapage(mp); | 2809 | release_metapage(mp); |
2813 | jfs_error(ipimap->i_sb, | 2810 | jfs_error(ipimap->i_sb, |
2814 | "diUpdatePMap: the inode is not allocated in " | 2811 | "diUpdatePMap: the inode is not allocated in " |
2815 | "the working map"); | 2812 | "the working map"); |
2816 | return -EIO; | 2813 | return -EIO; |
2817 | } | 2814 | } |
2818 | if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { | 2815 | if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { |
2819 | release_metapage(mp); | 2816 | release_metapage(mp); |
2820 | jfs_error(ipimap->i_sb, | 2817 | jfs_error(ipimap->i_sb, |
2821 | "diUpdatePMap: the inode is not free in the " | 2818 | "diUpdatePMap: the inode is not free in the " |
2822 | "persistent map"); | 2819 | "persistent map"); |
2823 | return -EIO; | 2820 | return -EIO; |
2824 | } | 2821 | } |
2825 | /* update the bitmap for the extent of the allocated inode */ | 2822 | /* update the bitmap for the extent of the allocated inode */ |
2826 | iagp->pmap[extno] |= cpu_to_le32(mask); | 2823 | iagp->pmap[extno] |= cpu_to_le32(mask); |
2827 | } | 2824 | } |
2828 | /* | 2825 | /* |
2829 | * update iag lsn | 2826 | * update iag lsn |
2830 | */ | 2827 | */ |
2831 | lsn = tblk->lsn; | 2828 | lsn = tblk->lsn; |
2832 | log = JFS_SBI(tblk->sb)->log; | 2829 | log = JFS_SBI(tblk->sb)->log; |
2833 | LOGSYNC_LOCK(log, flags); | 2830 | LOGSYNC_LOCK(log, flags); |
2834 | if (mp->lsn != 0) { | 2831 | if (mp->lsn != 0) { |
2835 | /* inherit older/smaller lsn */ | 2832 | /* inherit older/smaller lsn */ |
2836 | logdiff(difft, lsn, log); | 2833 | logdiff(difft, lsn, log); |
2837 | logdiff(diffp, mp->lsn, log); | 2834 | logdiff(diffp, mp->lsn, log); |
2838 | if (difft < diffp) { | 2835 | if (difft < diffp) { |
2839 | mp->lsn = lsn; | 2836 | mp->lsn = lsn; |
2840 | /* move mp after tblock in logsync list */ | 2837 | /* move mp after tblock in logsync list */ |
2841 | list_move(&mp->synclist, &tblk->synclist); | 2838 | list_move(&mp->synclist, &tblk->synclist); |
2842 | } | 2839 | } |
2843 | /* inherit younger/larger clsn */ | 2840 | /* inherit younger/larger clsn */ |
2844 | assert(mp->clsn); | 2841 | assert(mp->clsn); |
2845 | logdiff(difft, tblk->clsn, log); | 2842 | logdiff(difft, tblk->clsn, log); |
2846 | logdiff(diffp, mp->clsn, log); | 2843 | logdiff(diffp, mp->clsn, log); |
2847 | if (difft > diffp) | 2844 | if (difft > diffp) |
2848 | mp->clsn = tblk->clsn; | 2845 | mp->clsn = tblk->clsn; |
2849 | } else { | 2846 | } else { |
2850 | mp->log = log; | 2847 | mp->log = log; |
2851 | mp->lsn = lsn; | 2848 | mp->lsn = lsn; |
2852 | /* insert mp after tblock in logsync list */ | 2849 | /* insert mp after tblock in logsync list */ |
2853 | log->count++; | 2850 | log->count++; |
2854 | list_add(&mp->synclist, &tblk->synclist); | 2851 | list_add(&mp->synclist, &tblk->synclist); |
2855 | mp->clsn = tblk->clsn; | 2852 | mp->clsn = tblk->clsn; |
2856 | } | 2853 | } |
2857 | LOGSYNC_UNLOCK(log, flags); | 2854 | LOGSYNC_UNLOCK(log, flags); |
2858 | write_metapage(mp); | 2855 | write_metapage(mp); |
2859 | return (0); | 2856 | return (0); |
2860 | } | 2857 | } |
2861 | 2858 | ||
2862 | /* | 2859 | /* |
2863 | * diExtendFS() | 2860 | * diExtendFS() |
2864 | * | 2861 | * |
2865 | * function: update imap for extendfs(); | 2862 | * function: update imap for extendfs(); |
2866 | * | 2863 | * |
2867 | * note: AG size has been increased s.t. each k old contiguous AGs are | 2864 | * note: AG size has been increased s.t. each k old contiguous AGs are |
2868 | * coalesced into a new AG; | 2865 | * coalesced into a new AG; |
2869 | */ | 2866 | */ |
2870 | int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | 2867 | int diExtendFS(struct inode *ipimap, struct inode *ipbmap) |
2871 | { | 2868 | { |
2872 | int rc, rcx = 0; | 2869 | int rc, rcx = 0; |
2873 | struct inomap *imap = JFS_IP(ipimap)->i_imap; | 2870 | struct inomap *imap = JFS_IP(ipimap)->i_imap; |
2874 | struct iag *iagp = NULL, *hiagp = NULL; | 2871 | struct iag *iagp = NULL, *hiagp = NULL; |
2875 | struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; | 2872 | struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; |
2876 | struct metapage *bp, *hbp; | 2873 | struct metapage *bp, *hbp; |
2877 | int i, n, head; | 2874 | int i, n, head; |
2878 | int numinos, xnuminos = 0, xnumfree = 0; | 2875 | int numinos, xnuminos = 0, xnumfree = 0; |
2879 | s64 agstart; | 2876 | s64 agstart; |
2880 | 2877 | ||
2881 | jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", | 2878 | jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", |
2882 | imap->im_nextiag, atomic_read(&imap->im_numinos), | 2879 | imap->im_nextiag, atomic_read(&imap->im_numinos), |
2883 | atomic_read(&imap->im_numfree)); | 2880 | atomic_read(&imap->im_numfree)); |
2884 | 2881 | ||
2885 | /* | 2882 | /* |
2886 | * reconstruct imap | 2883 | * reconstruct imap |
2887 | * | 2884 | * |
2888 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; | 2885 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; |
2889 | * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; | 2886 | * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; |
2890 | * note: new AG size = old AG size * (2**x). | 2887 | * note: new AG size = old AG size * (2**x). |
2891 | */ | 2888 | */ |
2892 | 2889 | ||
2893 | /* init per AG control information im_agctl[] */ | 2890 | /* init per AG control information im_agctl[] */ |
2894 | for (i = 0; i < MAXAG; i++) { | 2891 | for (i = 0; i < MAXAG; i++) { |
2895 | imap->im_agctl[i].inofree = -1; | 2892 | imap->im_agctl[i].inofree = -1; |
2896 | imap->im_agctl[i].extfree = -1; | 2893 | imap->im_agctl[i].extfree = -1; |
2897 | imap->im_agctl[i].numinos = 0; /* number of backed inodes */ | 2894 | imap->im_agctl[i].numinos = 0; /* number of backed inodes */ |
2898 | imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ | 2895 | imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ |
2899 | } | 2896 | } |
2900 | 2897 | ||
2901 | /* | 2898 | /* |
2902 | * process each iag page of the map. | 2899 | * process each iag page of the map. |
2903 | * | 2900 | * |
2904 | * rebuild AG Free Inode List, AG Free Inode Extent List; | 2901 | * rebuild AG Free Inode List, AG Free Inode Extent List; |
2905 | */ | 2902 | */ |
2906 | for (i = 0; i < imap->im_nextiag; i++) { | 2903 | for (i = 0; i < imap->im_nextiag; i++) { |
2907 | if ((rc = diIAGRead(imap, i, &bp))) { | 2904 | if ((rc = diIAGRead(imap, i, &bp))) { |
2908 | rcx = rc; | 2905 | rcx = rc; |
2909 | continue; | 2906 | continue; |
2910 | } | 2907 | } |
2911 | iagp = (struct iag *) bp->data; | 2908 | iagp = (struct iag *) bp->data; |
2912 | if (le32_to_cpu(iagp->iagnum) != i) { | 2909 | if (le32_to_cpu(iagp->iagnum) != i) { |
2913 | release_metapage(bp); | 2910 | release_metapage(bp); |
2914 | jfs_error(ipimap->i_sb, | 2911 | jfs_error(ipimap->i_sb, |
2915 | "diExtendFs: unexpected value of iagnum"); | 2912 | "diExtendFs: unexpected value of iagnum"); |
2916 | return -EIO; | 2913 | return -EIO; |
2917 | } | 2914 | } |
2918 | 2915 | ||
2919 | /* leave free iag in the free iag list */ | 2916 | /* leave free iag in the free iag list */ |
2920 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { | 2917 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { |
2921 | release_metapage(bp); | 2918 | release_metapage(bp); |
2922 | continue; | 2919 | continue; |
2923 | } | 2920 | } |
2924 | 2921 | ||
2925 | /* agstart that computes to the same ag is treated as same; */ | 2922 | /* agstart that computes to the same ag is treated as same; */ |
2926 | agstart = le64_to_cpu(iagp->agstart); | 2923 | agstart = le64_to_cpu(iagp->agstart); |
2927 | /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ | 2924 | /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ |
2928 | n = agstart >> mp->db_agl2size; | 2925 | n = agstart >> mp->db_agl2size; |
2929 | 2926 | ||
2930 | /* compute backed inodes */ | 2927 | /* compute backed inodes */ |
2931 | numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) | 2928 | numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) |
2932 | << L2INOSPEREXT; | 2929 | << L2INOSPEREXT; |
2933 | if (numinos > 0) { | 2930 | if (numinos > 0) { |
2934 | /* merge AG backed inodes */ | 2931 | /* merge AG backed inodes */ |
2935 | imap->im_agctl[n].numinos += numinos; | 2932 | imap->im_agctl[n].numinos += numinos; |
2936 | xnuminos += numinos; | 2933 | xnuminos += numinos; |
2937 | } | 2934 | } |
2938 | 2935 | ||
2939 | /* if any backed free inodes, insert at AG free inode list */ | 2936 | /* if any backed free inodes, insert at AG free inode list */ |
2940 | if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { | 2937 | if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { |
2941 | if ((head = imap->im_agctl[n].inofree) == -1) { | 2938 | if ((head = imap->im_agctl[n].inofree) == -1) { |
2942 | iagp->inofreefwd = cpu_to_le32(-1); | 2939 | iagp->inofreefwd = cpu_to_le32(-1); |
2943 | iagp->inofreeback = cpu_to_le32(-1); | 2940 | iagp->inofreeback = cpu_to_le32(-1); |
2944 | } else { | 2941 | } else { |
2945 | if ((rc = diIAGRead(imap, head, &hbp))) { | 2942 | if ((rc = diIAGRead(imap, head, &hbp))) { |
2946 | rcx = rc; | 2943 | rcx = rc; |
2947 | goto nextiag; | 2944 | goto nextiag; |
2948 | } | 2945 | } |
2949 | hiagp = (struct iag *) hbp->data; | 2946 | hiagp = (struct iag *) hbp->data; |
2950 | hiagp->inofreeback = iagp->iagnum; | 2947 | hiagp->inofreeback = iagp->iagnum; |
2951 | iagp->inofreefwd = cpu_to_le32(head); | 2948 | iagp->inofreefwd = cpu_to_le32(head); |
2952 | iagp->inofreeback = cpu_to_le32(-1); | 2949 | iagp->inofreeback = cpu_to_le32(-1); |
2953 | write_metapage(hbp); | 2950 | write_metapage(hbp); |
2954 | } | 2951 | } |
2955 | 2952 | ||
2956 | imap->im_agctl[n].inofree = | 2953 | imap->im_agctl[n].inofree = |
2957 | le32_to_cpu(iagp->iagnum); | 2954 | le32_to_cpu(iagp->iagnum); |
2958 | 2955 | ||
2959 | /* merge AG backed free inodes */ | 2956 | /* merge AG backed free inodes */ |
2960 | imap->im_agctl[n].numfree += | 2957 | imap->im_agctl[n].numfree += |
2961 | le32_to_cpu(iagp->nfreeinos); | 2958 | le32_to_cpu(iagp->nfreeinos); |
2962 | xnumfree += le32_to_cpu(iagp->nfreeinos); | 2959 | xnumfree += le32_to_cpu(iagp->nfreeinos); |
2963 | } | 2960 | } |
2964 | 2961 | ||
2965 | /* if any free extents, insert at AG free extent list */ | 2962 | /* if any free extents, insert at AG free extent list */ |
2966 | if (le32_to_cpu(iagp->nfreeexts) > 0) { | 2963 | if (le32_to_cpu(iagp->nfreeexts) > 0) { |
2967 | if ((head = imap->im_agctl[n].extfree) == -1) { | 2964 | if ((head = imap->im_agctl[n].extfree) == -1) { |
2968 | iagp->extfreefwd = cpu_to_le32(-1); | 2965 | iagp->extfreefwd = cpu_to_le32(-1); |
2969 | iagp->extfreeback = cpu_to_le32(-1); | 2966 | iagp->extfreeback = cpu_to_le32(-1); |
2970 | } else { | 2967 | } else { |
2971 | if ((rc = diIAGRead(imap, head, &hbp))) { | 2968 | if ((rc = diIAGRead(imap, head, &hbp))) { |
2972 | rcx = rc; | 2969 | rcx = rc; |
2973 | goto nextiag; | 2970 | goto nextiag; |
2974 | } | 2971 | } |
2975 | hiagp = (struct iag *) hbp->data; | 2972 | hiagp = (struct iag *) hbp->data; |
2976 | hiagp->extfreeback = iagp->iagnum; | 2973 | hiagp->extfreeback = iagp->iagnum; |
2977 | iagp->extfreefwd = cpu_to_le32(head); | 2974 | iagp->extfreefwd = cpu_to_le32(head); |
2978 | iagp->extfreeback = cpu_to_le32(-1); | 2975 | iagp->extfreeback = cpu_to_le32(-1); |
2979 | write_metapage(hbp); | 2976 | write_metapage(hbp); |
2980 | } | 2977 | } |
2981 | 2978 | ||
2982 | imap->im_agctl[n].extfree = | 2979 | imap->im_agctl[n].extfree = |
2983 | le32_to_cpu(iagp->iagnum); | 2980 | le32_to_cpu(iagp->iagnum); |
2984 | } | 2981 | } |
2985 | 2982 | ||
2986 | nextiag: | 2983 | nextiag: |
2987 | write_metapage(bp); | 2984 | write_metapage(bp); |
2988 | } | 2985 | } |
2989 | 2986 | ||
2990 | if (xnuminos != atomic_read(&imap->im_numinos) || | 2987 | if (xnuminos != atomic_read(&imap->im_numinos) || |
2991 | xnumfree != atomic_read(&imap->im_numfree)) { | 2988 | xnumfree != atomic_read(&imap->im_numfree)) { |
2992 | jfs_error(ipimap->i_sb, | 2989 | jfs_error(ipimap->i_sb, |
2993 | "diExtendFs: numinos or numfree incorrect"); | 2990 | "diExtendFs: numinos or numfree incorrect"); |
2994 | return -EIO; | 2991 | return -EIO; |
2995 | } | 2992 | } |
2996 | 2993 | ||
2997 | return rcx; | 2994 | return rcx; |
2998 | } | 2995 | } |
2999 | 2996 | ||
3000 | 2997 | ||
3001 | /* | 2998 | /* |
3002 | * duplicateIXtree() | 2999 | * duplicateIXtree() |
3003 | * | 3000 | * |
3004 | * serialization: IWRITE_LOCK held on entry/exit | 3001 | * serialization: IWRITE_LOCK held on entry/exit |
3005 | * | 3002 | * |
3006 | * note: shadow page with regular inode (rel.2); | 3003 | * note: shadow page with regular inode (rel.2); |
3007 | */ | 3004 | */ |
3008 | static void duplicateIXtree(struct super_block *sb, s64 blkno, | 3005 | static void duplicateIXtree(struct super_block *sb, s64 blkno, |
3009 | int xlen, s64 *xaddr) | 3006 | int xlen, s64 *xaddr) |
3010 | { | 3007 | { |
3011 | struct jfs_superblock *j_sb; | 3008 | struct jfs_superblock *j_sb; |
3012 | struct buffer_head *bh; | 3009 | struct buffer_head *bh; |
3013 | struct inode *ip; | 3010 | struct inode *ip; |
3014 | tid_t tid; | 3011 | tid_t tid; |
3015 | 3012 | ||
3016 | /* if AIT2 ipmap2 is bad, do not try to update it */ | 3013 | /* if AIT2 ipmap2 is bad, do not try to update it */ |
3017 | if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ | 3014 | if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ |
3018 | return; | 3015 | return; |
3019 | ip = diReadSpecial(sb, FILESYSTEM_I, 1); | 3016 | ip = diReadSpecial(sb, FILESYSTEM_I, 1); |
3020 | if (ip == NULL) { | 3017 | if (ip == NULL) { |
3021 | JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; | 3018 | JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; |
3022 | if (readSuper(sb, &bh)) | 3019 | if (readSuper(sb, &bh)) |
3023 | return; | 3020 | return; |
3024 | j_sb = (struct jfs_superblock *)bh->b_data; | 3021 | j_sb = (struct jfs_superblock *)bh->b_data; |
3025 | j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); | 3022 | j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); |
3026 | 3023 | ||
3027 | mark_buffer_dirty(bh); | 3024 | mark_buffer_dirty(bh); |
3028 | sync_dirty_buffer(bh); | 3025 | sync_dirty_buffer(bh); |
3029 | brelse(bh); | 3026 | brelse(bh); |
3030 | return; | 3027 | return; |
3031 | } | 3028 | } |
3032 | 3029 | ||
3033 | /* start transaction */ | 3030 | /* start transaction */ |
3034 | tid = txBegin(sb, COMMIT_FORCE); | 3031 | tid = txBegin(sb, COMMIT_FORCE); |
3035 | /* update the inode map addressing structure to point to it */ | 3032 | /* update the inode map addressing structure to point to it */ |
3036 | if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { | 3033 | if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { |
3037 | JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; | 3034 | JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; |
3038 | txAbort(tid, 1); | 3035 | txAbort(tid, 1); |
3039 | goto cleanup; | 3036 | goto cleanup; |
3040 | 3037 | ||
3041 | } | 3038 | } |
3042 | /* update the inode map's inode to reflect the extension */ | 3039 | /* update the inode map's inode to reflect the extension */ |
3043 | ip->i_size += PSIZE; | 3040 | ip->i_size += PSIZE; |
3044 | inode_add_bytes(ip, PSIZE); | 3041 | inode_add_bytes(ip, PSIZE); |
3045 | txCommit(tid, 1, &ip, COMMIT_FORCE); | 3042 | txCommit(tid, 1, &ip, COMMIT_FORCE); |
3046 | cleanup: | 3043 | cleanup: |
3047 | txEnd(tid); | 3044 | txEnd(tid); |
3048 | diFreeSpecial(ip); | 3045 | diFreeSpecial(ip); |
3049 | } | 3046 | } |
3050 | 3047 | ||
3051 | /* | 3048 | /* |
3052 | * NAME: copy_from_dinode() | 3049 | * NAME: copy_from_dinode() |
3053 | * | 3050 | * |
3054 | * FUNCTION: Copies inode info from disk inode to in-memory inode | 3051 | * FUNCTION: Copies inode info from disk inode to in-memory inode |
3055 | * | 3052 | * |
3056 | * RETURN VALUES: | 3053 | * RETURN VALUES: |
3057 | * 0 - success | 3054 | * 0 - success |
3058 | * -ENOMEM - insufficient memory | 3055 | * -ENOMEM - insufficient memory |
3059 | */ | 3056 | */ |
3060 | static int copy_from_dinode(struct dinode * dip, struct inode *ip) | 3057 | static int copy_from_dinode(struct dinode * dip, struct inode *ip) |
3061 | { | 3058 | { |
3062 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 3059 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
3063 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | 3060 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); |
3064 | 3061 | ||
3065 | jfs_ip->fileset = le32_to_cpu(dip->di_fileset); | 3062 | jfs_ip->fileset = le32_to_cpu(dip->di_fileset); |
3066 | jfs_ip->mode2 = le32_to_cpu(dip->di_mode); | 3063 | jfs_ip->mode2 = le32_to_cpu(dip->di_mode); |
3067 | jfs_set_inode_flags(ip); | 3064 | jfs_set_inode_flags(ip); |
3068 | 3065 | ||
3069 | ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; | 3066 | ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; |
3070 | if (sbi->umask != -1) { | 3067 | if (sbi->umask != -1) { |
3071 | ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); | 3068 | ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); |
3072 | /* For directories, add x permission if r is allowed by umask */ | 3069 | /* For directories, add x permission if r is allowed by umask */ |
3073 | if (S_ISDIR(ip->i_mode)) { | 3070 | if (S_ISDIR(ip->i_mode)) { |
3074 | if (ip->i_mode & 0400) | 3071 | if (ip->i_mode & 0400) |
3075 | ip->i_mode |= 0100; | 3072 | ip->i_mode |= 0100; |
3076 | if (ip->i_mode & 0040) | 3073 | if (ip->i_mode & 0040) |
3077 | ip->i_mode |= 0010; | 3074 | ip->i_mode |= 0010; |
3078 | if (ip->i_mode & 0004) | 3075 | if (ip->i_mode & 0004) |
3079 | ip->i_mode |= 0001; | 3076 | ip->i_mode |= 0001; |
3080 | } | 3077 | } |
3081 | } | 3078 | } |
3082 | ip->i_nlink = le32_to_cpu(dip->di_nlink); | 3079 | ip->i_nlink = le32_to_cpu(dip->di_nlink); |
3083 | 3080 | ||
3084 | jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); | 3081 | jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); |
3085 | if (sbi->uid == -1) | 3082 | if (sbi->uid == -1) |
3086 | ip->i_uid = jfs_ip->saved_uid; | 3083 | ip->i_uid = jfs_ip->saved_uid; |
3087 | else { | 3084 | else { |
3088 | ip->i_uid = sbi->uid; | 3085 | ip->i_uid = sbi->uid; |
3089 | } | 3086 | } |
3090 | 3087 | ||
3091 | jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); | 3088 | jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); |
3092 | if (sbi->gid == -1) | 3089 | if (sbi->gid == -1) |
3093 | ip->i_gid = jfs_ip->saved_gid; | 3090 | ip->i_gid = jfs_ip->saved_gid; |
3094 | else { | 3091 | else { |
3095 | ip->i_gid = sbi->gid; | 3092 | ip->i_gid = sbi->gid; |
3096 | } | 3093 | } |
3097 | 3094 | ||
3098 | ip->i_size = le64_to_cpu(dip->di_size); | 3095 | ip->i_size = le64_to_cpu(dip->di_size); |
3099 | ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); | 3096 | ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); |
3100 | ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); | 3097 | ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); |
3101 | ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); | 3098 | ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); |
3102 | ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); | 3099 | ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); |
3103 | ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); | 3100 | ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); |
3104 | ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); | 3101 | ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); |
3105 | ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); | 3102 | ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); |
3106 | ip->i_generation = le32_to_cpu(dip->di_gen); | 3103 | ip->i_generation = le32_to_cpu(dip->di_gen); |
3107 | 3104 | ||
3108 | jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ | 3105 | jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ |
3109 | jfs_ip->acl = dip->di_acl; /* as are dxd's */ | 3106 | jfs_ip->acl = dip->di_acl; /* as are dxd's */ |
3110 | jfs_ip->ea = dip->di_ea; | 3107 | jfs_ip->ea = dip->di_ea; |
3111 | jfs_ip->next_index = le32_to_cpu(dip->di_next_index); | 3108 | jfs_ip->next_index = le32_to_cpu(dip->di_next_index); |
3112 | jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); | 3109 | jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); |
3113 | jfs_ip->acltype = le32_to_cpu(dip->di_acltype); | 3110 | jfs_ip->acltype = le32_to_cpu(dip->di_acltype); |
3114 | 3111 | ||
3115 | if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { | 3112 | if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { |
3116 | jfs_ip->dev = le32_to_cpu(dip->di_rdev); | 3113 | jfs_ip->dev = le32_to_cpu(dip->di_rdev); |
3117 | ip->i_rdev = new_decode_dev(jfs_ip->dev); | 3114 | ip->i_rdev = new_decode_dev(jfs_ip->dev); |
3118 | } | 3115 | } |
3119 | 3116 | ||
3120 | if (S_ISDIR(ip->i_mode)) { | 3117 | if (S_ISDIR(ip->i_mode)) { |
3121 | memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); | 3118 | memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); |
3122 | } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { | 3119 | } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { |
3123 | memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); | 3120 | memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); |
3124 | } else | 3121 | } else |
3125 | memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); | 3122 | memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); |
3126 | 3123 | ||
3127 | /* Zero the in-memory-only stuff */ | 3124 | /* Zero the in-memory-only stuff */ |
3128 | jfs_ip->cflag = 0; | 3125 | jfs_ip->cflag = 0; |
3129 | jfs_ip->btindex = 0; | 3126 | jfs_ip->btindex = 0; |
3130 | jfs_ip->btorder = 0; | 3127 | jfs_ip->btorder = 0; |
3131 | jfs_ip->bxflag = 0; | 3128 | jfs_ip->bxflag = 0; |
3132 | jfs_ip->blid = 0; | 3129 | jfs_ip->blid = 0; |
3133 | jfs_ip->atlhead = 0; | 3130 | jfs_ip->atlhead = 0; |
3134 | jfs_ip->atltail = 0; | 3131 | jfs_ip->atltail = 0; |
3135 | jfs_ip->xtlid = 0; | 3132 | jfs_ip->xtlid = 0; |
3136 | return (0); | 3133 | return (0); |
3137 | } | 3134 | } |
3138 | 3135 | ||
3139 | /* | 3136 | /* |
3140 | * NAME: copy_to_dinode() | 3137 | * NAME: copy_to_dinode() |
3141 | * | 3138 | * |
3142 | * FUNCTION: Copies inode info from in-memory inode to disk inode | 3139 | * FUNCTION: Copies inode info from in-memory inode to disk inode |
3143 | */ | 3140 | */ |
3144 | static void copy_to_dinode(struct dinode * dip, struct inode *ip) | 3141 | static void copy_to_dinode(struct dinode * dip, struct inode *ip) |
3145 | { | 3142 | { |
3146 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 3143 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
3147 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | 3144 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); |
3148 | 3145 | ||
3149 | dip->di_fileset = cpu_to_le32(jfs_ip->fileset); | 3146 | dip->di_fileset = cpu_to_le32(jfs_ip->fileset); |
3150 | dip->di_inostamp = cpu_to_le32(sbi->inostamp); | 3147 | dip->di_inostamp = cpu_to_le32(sbi->inostamp); |
3151 | dip->di_number = cpu_to_le32(ip->i_ino); | 3148 | dip->di_number = cpu_to_le32(ip->i_ino); |
3152 | dip->di_gen = cpu_to_le32(ip->i_generation); | 3149 | dip->di_gen = cpu_to_le32(ip->i_generation); |
3153 | dip->di_size = cpu_to_le64(ip->i_size); | 3150 | dip->di_size = cpu_to_le64(ip->i_size); |
3154 | dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); | 3151 | dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); |
3155 | dip->di_nlink = cpu_to_le32(ip->i_nlink); | 3152 | dip->di_nlink = cpu_to_le32(ip->i_nlink); |
3156 | if (sbi->uid == -1) | 3153 | if (sbi->uid == -1) |
3157 | dip->di_uid = cpu_to_le32(ip->i_uid); | 3154 | dip->di_uid = cpu_to_le32(ip->i_uid); |
3158 | else | 3155 | else |
3159 | dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); | 3156 | dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); |
3160 | if (sbi->gid == -1) | 3157 | if (sbi->gid == -1) |
3161 | dip->di_gid = cpu_to_le32(ip->i_gid); | 3158 | dip->di_gid = cpu_to_le32(ip->i_gid); |
3162 | else | 3159 | else |
3163 | dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); | 3160 | dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); |
3164 | jfs_get_inode_flags(jfs_ip); | 3161 | jfs_get_inode_flags(jfs_ip); |
3165 | /* | 3162 | /* |
3166 | * mode2 is only needed for storing the higher order bits. | 3163 | * mode2 is only needed for storing the higher order bits. |
3167 | * Trust i_mode for the lower order ones | 3164 | * Trust i_mode for the lower order ones |
3168 | */ | 3165 | */ |
3169 | if (sbi->umask == -1) | 3166 | if (sbi->umask == -1) |
3170 | dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | | 3167 | dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | |
3171 | ip->i_mode); | 3168 | ip->i_mode); |
3172 | else /* Leave the original permissions alone */ | 3169 | else /* Leave the original permissions alone */ |
3173 | dip->di_mode = cpu_to_le32(jfs_ip->mode2); | 3170 | dip->di_mode = cpu_to_le32(jfs_ip->mode2); |
3174 | 3171 | ||
3175 | dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); | 3172 | dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); |
3176 | dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); | 3173 | dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); |
3177 | dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); | 3174 | dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); |
3178 | dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); | 3175 | dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); |
3179 | dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); | 3176 | dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); |
3180 | dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); | 3177 | dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); |
3181 | dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ | 3178 | dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ |
3182 | dip->di_acl = jfs_ip->acl; /* as are dxd's */ | 3179 | dip->di_acl = jfs_ip->acl; /* as are dxd's */ |
3183 | dip->di_ea = jfs_ip->ea; | 3180 | dip->di_ea = jfs_ip->ea; |
3184 | dip->di_next_index = cpu_to_le32(jfs_ip->next_index); | 3181 | dip->di_next_index = cpu_to_le32(jfs_ip->next_index); |
3185 | dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); | 3182 | dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); |
3186 | dip->di_otime.tv_nsec = 0; | 3183 | dip->di_otime.tv_nsec = 0; |
3187 | dip->di_acltype = cpu_to_le32(jfs_ip->acltype); | 3184 | dip->di_acltype = cpu_to_le32(jfs_ip->acltype); |
3188 | if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) | 3185 | if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) |
3189 | dip->di_rdev = cpu_to_le32(jfs_ip->dev); | 3186 | dip->di_rdev = cpu_to_le32(jfs_ip->dev); |
3190 | } | 3187 | } |
3191 | 3188 |
fs/jfs/jfs_xtree.c
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2005 | 2 | * Copyright (C) International Business Machines Corp., 2000-2005 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | /* | 18 | /* |
19 | * jfs_xtree.c: extent allocation descriptor B+-tree manager | 19 | * jfs_xtree.c: extent allocation descriptor B+-tree manager |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/quotaops.h> | 23 | #include <linux/quotaops.h> |
24 | #include "jfs_incore.h" | 24 | #include "jfs_incore.h" |
25 | #include "jfs_filsys.h" | 25 | #include "jfs_filsys.h" |
26 | #include "jfs_metapage.h" | 26 | #include "jfs_metapage.h" |
27 | #include "jfs_dmap.h" | 27 | #include "jfs_dmap.h" |
28 | #include "jfs_dinode.h" | 28 | #include "jfs_dinode.h" |
29 | #include "jfs_superblock.h" | 29 | #include "jfs_superblock.h" |
30 | #include "jfs_debug.h" | 30 | #include "jfs_debug.h" |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * xtree local flag | 33 | * xtree local flag |
34 | */ | 34 | */ |
35 | #define XT_INSERT 0x00000001 | 35 | #define XT_INSERT 0x00000001 |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * xtree key/entry comparison: extent offset | 38 | * xtree key/entry comparison: extent offset |
39 | * | 39 | * |
40 | * return: | 40 | * return: |
41 | * -1: k < start of extent | 41 | * -1: k < start of extent |
42 | * 0: start_of_extent <= k <= end_of_extent | 42 | * 0: start_of_extent <= k <= end_of_extent |
43 | * 1: k > end_of_extent | 43 | * 1: k > end_of_extent |
44 | */ | 44 | */ |
45 | #define XT_CMP(CMP, K, X, OFFSET64)\ | 45 | #define XT_CMP(CMP, K, X, OFFSET64)\ |
46 | {\ | 46 | {\ |
47 | OFFSET64 = offsetXAD(X);\ | 47 | OFFSET64 = offsetXAD(X);\ |
48 | (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ | 48 | (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ |
49 | ((K) < OFFSET64) ? -1 : 0;\ | 49 | ((K) < OFFSET64) ? -1 : 0;\ |
50 | } | 50 | } |
51 | 51 | ||
52 | /* write a xad entry */ | 52 | /* write a xad entry */ |
53 | #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ | 53 | #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ |
54 | {\ | 54 | {\ |
55 | (XAD)->flag = (FLAG);\ | 55 | (XAD)->flag = (FLAG);\ |
56 | XADoffset((XAD), (OFF));\ | 56 | XADoffset((XAD), (OFF));\ |
57 | XADlength((XAD), (LEN));\ | 57 | XADlength((XAD), (LEN));\ |
58 | XADaddress((XAD), (ADDR));\ | 58 | XADaddress((XAD), (ADDR));\ |
59 | } | 59 | } |
60 | 60 | ||
61 | #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) | 61 | #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) |
62 | 62 | ||
63 | /* get page buffer for specified block address */ | 63 | /* get page buffer for specified block address */ |
64 | /* ToDo: Replace this ugly macro with a function */ | 64 | /* ToDo: Replace this ugly macro with a function */ |
65 | #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ | 65 | #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ |
66 | {\ | 66 | {\ |
67 | BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ | 67 | BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ |
68 | if (!(RC))\ | 68 | if (!(RC))\ |
69 | {\ | 69 | {\ |
70 | if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ | 70 | if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ |
71 | (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ | 71 | (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ |
72 | (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ | 72 | (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ |
73 | {\ | 73 | {\ |
74 | jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ | 74 | jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ |
75 | BT_PUTPAGE(MP);\ | 75 | BT_PUTPAGE(MP);\ |
76 | MP = NULL;\ | 76 | MP = NULL;\ |
77 | RC = -EIO;\ | 77 | RC = -EIO;\ |
78 | }\ | 78 | }\ |
79 | }\ | 79 | }\ |
80 | } | 80 | } |
81 | 81 | ||
82 | /* for consistency */ | 82 | /* for consistency */ |
83 | #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) | 83 | #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) |
84 | 84 | ||
85 | #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ | 85 | #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ |
86 | BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) | 86 | BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) |
87 | /* xtree entry parameter descriptor */ | 87 | /* xtree entry parameter descriptor */ |
88 | struct xtsplit { | 88 | struct xtsplit { |
89 | struct metapage *mp; | 89 | struct metapage *mp; |
90 | s16 index; | 90 | s16 index; |
91 | u8 flag; | 91 | u8 flag; |
92 | s64 off; | 92 | s64 off; |
93 | s64 addr; | 93 | s64 addr; |
94 | int len; | 94 | int len; |
95 | struct pxdlist *pxdlist; | 95 | struct pxdlist *pxdlist; |
96 | }; | 96 | }; |
97 | 97 | ||
98 | 98 | ||
99 | /* | 99 | /* |
100 | * statistics | 100 | * statistics |
101 | */ | 101 | */ |
102 | #ifdef CONFIG_JFS_STATISTICS | 102 | #ifdef CONFIG_JFS_STATISTICS |
103 | static struct { | 103 | static struct { |
104 | uint search; | 104 | uint search; |
105 | uint fastSearch; | 105 | uint fastSearch; |
106 | uint split; | 106 | uint split; |
107 | } xtStat; | 107 | } xtStat; |
108 | #endif | 108 | #endif |
109 | 109 | ||
110 | 110 | ||
111 | /* | 111 | /* |
112 | * forward references | 112 | * forward references |
113 | */ | 113 | */ |
114 | static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, | 114 | static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, |
115 | struct btstack * btstack, int flag); | 115 | struct btstack * btstack, int flag); |
116 | 116 | ||
117 | static int xtSplitUp(tid_t tid, | 117 | static int xtSplitUp(tid_t tid, |
118 | struct inode *ip, | 118 | struct inode *ip, |
119 | struct xtsplit * split, struct btstack * btstack); | 119 | struct xtsplit * split, struct btstack * btstack); |
120 | 120 | ||
121 | static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, | 121 | static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, |
122 | struct metapage ** rmpp, s64 * rbnp); | 122 | struct metapage ** rmpp, s64 * rbnp); |
123 | 123 | ||
124 | static int xtSplitRoot(tid_t tid, struct inode *ip, | 124 | static int xtSplitRoot(tid_t tid, struct inode *ip, |
125 | struct xtsplit * split, struct metapage ** rmpp); | 125 | struct xtsplit * split, struct metapage ** rmpp); |
126 | 126 | ||
127 | #ifdef _STILL_TO_PORT | 127 | #ifdef _STILL_TO_PORT |
128 | static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, | 128 | static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, |
129 | xtpage_t * fp, struct btstack * btstack); | 129 | xtpage_t * fp, struct btstack * btstack); |
130 | 130 | ||
131 | static int xtSearchNode(struct inode *ip, | 131 | static int xtSearchNode(struct inode *ip, |
132 | xad_t * xad, | 132 | xad_t * xad, |
133 | int *cmpp, struct btstack * btstack, int flag); | 133 | int *cmpp, struct btstack * btstack, int flag); |
134 | 134 | ||
135 | static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); | 135 | static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); |
136 | #endif /* _STILL_TO_PORT */ | 136 | #endif /* _STILL_TO_PORT */ |
137 | 137 | ||
138 | /* | 138 | /* |
139 | * xtLookup() | 139 | * xtLookup() |
140 | * | 140 | * |
141 | * function: map a single page into a physical extent; | 141 | * function: map a single page into a physical extent; |
142 | */ | 142 | */ |
143 | int xtLookup(struct inode *ip, s64 lstart, | 143 | int xtLookup(struct inode *ip, s64 lstart, |
144 | s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) | 144 | s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) |
145 | { | 145 | { |
146 | int rc = 0; | 146 | int rc = 0; |
147 | struct btstack btstack; | 147 | struct btstack btstack; |
148 | int cmp; | 148 | int cmp; |
149 | s64 bn; | 149 | s64 bn; |
150 | struct metapage *mp; | 150 | struct metapage *mp; |
151 | xtpage_t *p; | 151 | xtpage_t *p; |
152 | int index; | 152 | int index; |
153 | xad_t *xad; | 153 | xad_t *xad; |
154 | s64 next, size, xoff, xend; | 154 | s64 next, size, xoff, xend; |
155 | int xlen; | 155 | int xlen; |
156 | s64 xaddr; | 156 | s64 xaddr; |
157 | 157 | ||
158 | *paddr = 0; | 158 | *paddr = 0; |
159 | *plen = llen; | 159 | *plen = llen; |
160 | 160 | ||
161 | if (!no_check) { | 161 | if (!no_check) { |
162 | /* is lookup offset beyond eof ? */ | 162 | /* is lookup offset beyond eof ? */ |
163 | size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> | 163 | size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> |
164 | JFS_SBI(ip->i_sb)->l2bsize; | 164 | JFS_SBI(ip->i_sb)->l2bsize; |
165 | if (lstart >= size) { | 165 | if (lstart >= size) { |
166 | jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", | 166 | jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", |
167 | (ulong) lstart, (ulong) size); | 167 | (ulong) lstart, (ulong) size); |
168 | return 0; | 168 | return 0; |
169 | } | 169 | } |
170 | } | 170 | } |
171 | 171 | ||
172 | /* | 172 | /* |
173 | * search for the xad entry covering the logical extent | 173 | * search for the xad entry covering the logical extent |
174 | */ | 174 | */ |
175 | //search: | 175 | //search: |
176 | if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { | 176 | if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { |
177 | jfs_err("xtLookup: xtSearch returned %d", rc); | 177 | jfs_err("xtLookup: xtSearch returned %d", rc); |
178 | return rc; | 178 | return rc; |
179 | } | 179 | } |
180 | 180 | ||
181 | /* | 181 | /* |
182 | * compute the physical extent covering logical extent | 182 | * compute the physical extent covering logical extent |
183 | * | 183 | * |
184 | * N.B. search may have failed (e.g., hole in sparse file), | 184 | * N.B. search may have failed (e.g., hole in sparse file), |
185 | * and returned the index of the next entry. | 185 | * and returned the index of the next entry. |
186 | */ | 186 | */ |
187 | /* retrieve search result */ | 187 | /* retrieve search result */ |
188 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); | 188 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); |
189 | 189 | ||
190 | /* is xad found covering start of logical extent ? | 190 | /* is xad found covering start of logical extent ? |
191 | * lstart is a page start address, | 191 | * lstart is a page start address, |
192 | * i.e., lstart cannot start in a hole; | 192 | * i.e., lstart cannot start in a hole; |
193 | */ | 193 | */ |
194 | if (cmp) { | 194 | if (cmp) { |
195 | if (next) | 195 | if (next) |
196 | *plen = min(next - lstart, llen); | 196 | *plen = min(next - lstart, llen); |
197 | goto out; | 197 | goto out; |
198 | } | 198 | } |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * lxd covered by xad | 201 | * lxd covered by xad |
202 | */ | 202 | */ |
203 | xad = &p->xad[index]; | 203 | xad = &p->xad[index]; |
204 | xoff = offsetXAD(xad); | 204 | xoff = offsetXAD(xad); |
205 | xlen = lengthXAD(xad); | 205 | xlen = lengthXAD(xad); |
206 | xend = xoff + xlen; | 206 | xend = xoff + xlen; |
207 | xaddr = addressXAD(xad); | 207 | xaddr = addressXAD(xad); |
208 | 208 | ||
209 | /* initialize new pxd */ | 209 | /* initialize new pxd */ |
210 | *pflag = xad->flag; | 210 | *pflag = xad->flag; |
211 | *paddr = xaddr + (lstart - xoff); | 211 | *paddr = xaddr + (lstart - xoff); |
212 | /* a page must be fully covered by an xad */ | 212 | /* a page must be fully covered by an xad */ |
213 | *plen = min(xend - lstart, llen); | 213 | *plen = min(xend - lstart, llen); |
214 | 214 | ||
215 | out: | 215 | out: |
216 | XT_PUTPAGE(mp); | 216 | XT_PUTPAGE(mp); |
217 | 217 | ||
218 | return rc; | 218 | return rc; |
219 | } | 219 | } |
220 | 220 | ||
221 | 221 | ||
222 | /* | 222 | /* |
223 | * xtLookupList() | 223 | * xtLookupList() |
224 | * | 224 | * |
225 | * function: map a single logical extent into a list of physical extent; | 225 | * function: map a single logical extent into a list of physical extent; |
226 | * | 226 | * |
227 | * parameter: | 227 | * parameter: |
228 | * struct inode *ip, | 228 | * struct inode *ip, |
229 | * struct lxdlist *lxdlist, lxd list (in) | 229 | * struct lxdlist *lxdlist, lxd list (in) |
230 | * struct xadlist *xadlist, xad list (in/out) | 230 | * struct xadlist *xadlist, xad list (in/out) |
231 | * int flag) | 231 | * int flag) |
232 | * | 232 | * |
233 | * coverage of lxd by xad under assumption of | 233 | * coverage of lxd by xad under assumption of |
234 | * . lxd's are ordered and disjoint. | 234 | * . lxd's are ordered and disjoint. |
235 | * . xad's are ordered and disjoint. | 235 | * . xad's are ordered and disjoint. |
236 | * | 236 | * |
237 | * return: | 237 | * return: |
238 | * 0: success | 238 | * 0: success |
239 | * | 239 | * |
240 | * note: a page being written (even a single byte) is backed fully, | 240 | * note: a page being written (even a single byte) is backed fully, |
241 | * except the last page which is only backed with blocks | 241 | * except the last page which is only backed with blocks |
242 | * required to cover the last byte; | 242 | * required to cover the last byte; |
243 | * the extent backing a page is fully contained within an xad; | 243 | * the extent backing a page is fully contained within an xad; |
244 | */ | 244 | */ |
245 | int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, | 245 | int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, |
246 | struct xadlist * xadlist, int flag) | 246 | struct xadlist * xadlist, int flag) |
247 | { | 247 | { |
248 | int rc = 0; | 248 | int rc = 0; |
249 | struct btstack btstack; | 249 | struct btstack btstack; |
250 | int cmp; | 250 | int cmp; |
251 | s64 bn; | 251 | s64 bn; |
252 | struct metapage *mp; | 252 | struct metapage *mp; |
253 | xtpage_t *p; | 253 | xtpage_t *p; |
254 | int index; | 254 | int index; |
255 | lxd_t *lxd; | 255 | lxd_t *lxd; |
256 | xad_t *xad, *pxd; | 256 | xad_t *xad, *pxd; |
257 | s64 size, lstart, lend, xstart, xend, pstart; | 257 | s64 size, lstart, lend, xstart, xend, pstart; |
258 | s64 llen, xlen, plen; | 258 | s64 llen, xlen, plen; |
259 | s64 xaddr, paddr; | 259 | s64 xaddr, paddr; |
260 | int nlxd, npxd, maxnpxd; | 260 | int nlxd, npxd, maxnpxd; |
261 | 261 | ||
262 | npxd = xadlist->nxad = 0; | 262 | npxd = xadlist->nxad = 0; |
263 | maxnpxd = xadlist->maxnxad; | 263 | maxnpxd = xadlist->maxnxad; |
264 | pxd = xadlist->xad; | 264 | pxd = xadlist->xad; |
265 | 265 | ||
266 | nlxd = lxdlist->nlxd; | 266 | nlxd = lxdlist->nlxd; |
267 | lxd = lxdlist->lxd; | 267 | lxd = lxdlist->lxd; |
268 | 268 | ||
269 | lstart = offsetLXD(lxd); | 269 | lstart = offsetLXD(lxd); |
270 | llen = lengthLXD(lxd); | 270 | llen = lengthLXD(lxd); |
271 | lend = lstart + llen; | 271 | lend = lstart + llen; |
272 | 272 | ||
273 | size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> | 273 | size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> |
274 | JFS_SBI(ip->i_sb)->l2bsize; | 274 | JFS_SBI(ip->i_sb)->l2bsize; |
275 | 275 | ||
276 | /* | 276 | /* |
277 | * search for the xad entry covering the logical extent | 277 | * search for the xad entry covering the logical extent |
278 | */ | 278 | */ |
279 | search: | 279 | search: |
280 | if (lstart >= size) | 280 | if (lstart >= size) |
281 | return 0; | 281 | return 0; |
282 | 282 | ||
283 | if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) | 283 | if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) |
284 | return rc; | 284 | return rc; |
285 | 285 | ||
286 | /* | 286 | /* |
287 | * compute the physical extent covering logical extent | 287 | * compute the physical extent covering logical extent |
288 | * | 288 | * |
289 | * N.B. search may have failed (e.g., hole in sparse file), | 289 | * N.B. search may have failed (e.g., hole in sparse file), |
290 | * and returned the index of the next entry. | 290 | * and returned the index of the next entry. |
291 | */ | 291 | */ |
292 | //map: | 292 | //map: |
293 | /* retrieve search result */ | 293 | /* retrieve search result */ |
294 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); | 294 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); |
295 | 295 | ||
296 | /* is xad on the next sibling page ? */ | 296 | /* is xad on the next sibling page ? */ |
297 | if (index == le16_to_cpu(p->header.nextindex)) { | 297 | if (index == le16_to_cpu(p->header.nextindex)) { |
298 | if (p->header.flag & BT_ROOT) | 298 | if (p->header.flag & BT_ROOT) |
299 | goto mapend; | 299 | goto mapend; |
300 | 300 | ||
301 | if ((bn = le64_to_cpu(p->header.next)) == 0) | 301 | if ((bn = le64_to_cpu(p->header.next)) == 0) |
302 | goto mapend; | 302 | goto mapend; |
303 | 303 | ||
304 | XT_PUTPAGE(mp); | 304 | XT_PUTPAGE(mp); |
305 | 305 | ||
306 | /* get next sibling page */ | 306 | /* get next sibling page */ |
307 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 307 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
308 | if (rc) | 308 | if (rc) |
309 | return rc; | 309 | return rc; |
310 | 310 | ||
311 | index = XTENTRYSTART; | 311 | index = XTENTRYSTART; |
312 | } | 312 | } |
313 | 313 | ||
314 | xad = &p->xad[index]; | 314 | xad = &p->xad[index]; |
315 | 315 | ||
316 | /* | 316 | /* |
317 | * is lxd covered by xad ? | 317 | * is lxd covered by xad ? |
318 | */ | 318 | */ |
319 | compare: | 319 | compare: |
320 | xstart = offsetXAD(xad); | 320 | xstart = offsetXAD(xad); |
321 | xlen = lengthXAD(xad); | 321 | xlen = lengthXAD(xad); |
322 | xend = xstart + xlen; | 322 | xend = xstart + xlen; |
323 | xaddr = addressXAD(xad); | 323 | xaddr = addressXAD(xad); |
324 | 324 | ||
325 | compare1: | 325 | compare1: |
326 | if (xstart < lstart) | 326 | if (xstart < lstart) |
327 | goto compare2; | 327 | goto compare2; |
328 | 328 | ||
329 | /* (lstart <= xstart) */ | 329 | /* (lstart <= xstart) */ |
330 | 330 | ||
331 | /* lxd is NOT covered by xad */ | 331 | /* lxd is NOT covered by xad */ |
332 | if (lend <= xstart) { | 332 | if (lend <= xstart) { |
333 | /* | 333 | /* |
334 | * get next lxd | 334 | * get next lxd |
335 | */ | 335 | */ |
336 | if (--nlxd == 0) | 336 | if (--nlxd == 0) |
337 | goto mapend; | 337 | goto mapend; |
338 | lxd++; | 338 | lxd++; |
339 | 339 | ||
340 | lstart = offsetLXD(lxd); | 340 | lstart = offsetLXD(lxd); |
341 | llen = lengthLXD(lxd); | 341 | llen = lengthLXD(lxd); |
342 | lend = lstart + llen; | 342 | lend = lstart + llen; |
343 | if (lstart >= size) | 343 | if (lstart >= size) |
344 | goto mapend; | 344 | goto mapend; |
345 | 345 | ||
346 | /* compare with the current xad */ | 346 | /* compare with the current xad */ |
347 | goto compare1; | 347 | goto compare1; |
348 | } | 348 | } |
349 | /* lxd is covered by xad */ | 349 | /* lxd is covered by xad */ |
350 | else { /* (xstart < lend) */ | 350 | else { /* (xstart < lend) */ |
351 | 351 | ||
352 | /* initialize new pxd */ | 352 | /* initialize new pxd */ |
353 | pstart = xstart; | 353 | pstart = xstart; |
354 | plen = min(lend - xstart, xlen); | 354 | plen = min(lend - xstart, xlen); |
355 | paddr = xaddr; | 355 | paddr = xaddr; |
356 | 356 | ||
357 | goto cover; | 357 | goto cover; |
358 | } | 358 | } |
359 | 359 | ||
360 | /* (xstart < lstart) */ | 360 | /* (xstart < lstart) */ |
361 | compare2: | 361 | compare2: |
362 | /* lxd is covered by xad */ | 362 | /* lxd is covered by xad */ |
363 | if (lstart < xend) { | 363 | if (lstart < xend) { |
364 | /* initialize new pxd */ | 364 | /* initialize new pxd */ |
365 | pstart = lstart; | 365 | pstart = lstart; |
366 | plen = min(xend - lstart, llen); | 366 | plen = min(xend - lstart, llen); |
367 | paddr = xaddr + (lstart - xstart); | 367 | paddr = xaddr + (lstart - xstart); |
368 | 368 | ||
369 | goto cover; | 369 | goto cover; |
370 | } | 370 | } |
371 | /* lxd is NOT covered by xad */ | 371 | /* lxd is NOT covered by xad */ |
372 | else { /* (xend <= lstart) */ | 372 | else { /* (xend <= lstart) */ |
373 | 373 | ||
374 | /* | 374 | /* |
375 | * get next xad | 375 | * get next xad |
376 | * | 376 | * |
377 | * linear search next xad covering lxd on | 377 | * linear search next xad covering lxd on |
378 | * the current xad page, and then tree search | 378 | * the current xad page, and then tree search |
379 | */ | 379 | */ |
380 | if (index == le16_to_cpu(p->header.nextindex) - 1) { | 380 | if (index == le16_to_cpu(p->header.nextindex) - 1) { |
381 | if (p->header.flag & BT_ROOT) | 381 | if (p->header.flag & BT_ROOT) |
382 | goto mapend; | 382 | goto mapend; |
383 | 383 | ||
384 | XT_PUTPAGE(mp); | 384 | XT_PUTPAGE(mp); |
385 | goto search; | 385 | goto search; |
386 | } else { | 386 | } else { |
387 | index++; | 387 | index++; |
388 | xad++; | 388 | xad++; |
389 | 389 | ||
390 | /* compare with new xad */ | 390 | /* compare with new xad */ |
391 | goto compare; | 391 | goto compare; |
392 | } | 392 | } |
393 | } | 393 | } |
394 | 394 | ||
395 | /* | 395 | /* |
396 | * lxd is covered by xad and a new pxd has been initialized | 396 | * lxd is covered by xad and a new pxd has been initialized |
397 | * (lstart <= xstart < lend) or (xstart < lstart < xend) | 397 | * (lstart <= xstart < lend) or (xstart < lstart < xend) |
398 | */ | 398 | */ |
399 | cover: | 399 | cover: |
400 | /* finalize pxd corresponding to current xad */ | 400 | /* finalize pxd corresponding to current xad */ |
401 | XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); | 401 | XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); |
402 | 402 | ||
403 | if (++npxd >= maxnpxd) | 403 | if (++npxd >= maxnpxd) |
404 | goto mapend; | 404 | goto mapend; |
405 | pxd++; | 405 | pxd++; |
406 | 406 | ||
407 | /* | 407 | /* |
408 | * lxd is fully covered by xad | 408 | * lxd is fully covered by xad |
409 | */ | 409 | */ |
410 | if (lend <= xend) { | 410 | if (lend <= xend) { |
411 | /* | 411 | /* |
412 | * get next lxd | 412 | * get next lxd |
413 | */ | 413 | */ |
414 | if (--nlxd == 0) | 414 | if (--nlxd == 0) |
415 | goto mapend; | 415 | goto mapend; |
416 | lxd++; | 416 | lxd++; |
417 | 417 | ||
418 | lstart = offsetLXD(lxd); | 418 | lstart = offsetLXD(lxd); |
419 | llen = lengthLXD(lxd); | 419 | llen = lengthLXD(lxd); |
420 | lend = lstart + llen; | 420 | lend = lstart + llen; |
421 | if (lstart >= size) | 421 | if (lstart >= size) |
422 | goto mapend; | 422 | goto mapend; |
423 | 423 | ||
424 | /* | 424 | /* |
425 | * test for old xad covering new lxd | 425 | * test for old xad covering new lxd |
426 | * (old xstart < new lstart) | 426 | * (old xstart < new lstart) |
427 | */ | 427 | */ |
428 | goto compare2; | 428 | goto compare2; |
429 | } | 429 | } |
430 | /* | 430 | /* |
431 | * lxd is partially covered by xad | 431 | * lxd is partially covered by xad |
432 | */ | 432 | */ |
433 | else { /* (xend < lend) */ | 433 | else { /* (xend < lend) */ |
434 | 434 | ||
435 | /* | 435 | /* |
436 | * get next xad | 436 | * get next xad |
437 | * | 437 | * |
438 | * linear search next xad covering lxd on | 438 | * linear search next xad covering lxd on |
439 | * the current xad page, and then next xad page search | 439 | * the current xad page, and then next xad page search |
440 | */ | 440 | */ |
441 | if (index == le16_to_cpu(p->header.nextindex) - 1) { | 441 | if (index == le16_to_cpu(p->header.nextindex) - 1) { |
442 | if (p->header.flag & BT_ROOT) | 442 | if (p->header.flag & BT_ROOT) |
443 | goto mapend; | 443 | goto mapend; |
444 | 444 | ||
445 | if ((bn = le64_to_cpu(p->header.next)) == 0) | 445 | if ((bn = le64_to_cpu(p->header.next)) == 0) |
446 | goto mapend; | 446 | goto mapend; |
447 | 447 | ||
448 | XT_PUTPAGE(mp); | 448 | XT_PUTPAGE(mp); |
449 | 449 | ||
450 | /* get next sibling page */ | 450 | /* get next sibling page */ |
451 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 451 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
452 | if (rc) | 452 | if (rc) |
453 | return rc; | 453 | return rc; |
454 | 454 | ||
455 | index = XTENTRYSTART; | 455 | index = XTENTRYSTART; |
456 | xad = &p->xad[index]; | 456 | xad = &p->xad[index]; |
457 | } else { | 457 | } else { |
458 | index++; | 458 | index++; |
459 | xad++; | 459 | xad++; |
460 | } | 460 | } |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * test for new xad covering old lxd | 463 | * test for new xad covering old lxd |
464 | * (old lstart < new xstart) | 464 | * (old lstart < new xstart) |
465 | */ | 465 | */ |
466 | goto compare; | 466 | goto compare; |
467 | } | 467 | } |
468 | 468 | ||
469 | mapend: | 469 | mapend: |
470 | xadlist->nxad = npxd; | 470 | xadlist->nxad = npxd; |
471 | 471 | ||
472 | //out: | 472 | //out: |
473 | XT_PUTPAGE(mp); | 473 | XT_PUTPAGE(mp); |
474 | 474 | ||
475 | return rc; | 475 | return rc; |
476 | } | 476 | } |
477 | 477 | ||
478 | 478 | ||
479 | /* | 479 | /* |
480 | * xtSearch() | 480 | * xtSearch() |
481 | * | 481 | * |
482 | * function: search for the xad entry covering specified offset. | 482 | * function: search for the xad entry covering specified offset. |
483 | * | 483 | * |
484 | * parameters: | 484 | * parameters: |
485 | * ip - file object; | 485 | * ip - file object; |
486 | * xoff - extent offset; | 486 | * xoff - extent offset; |
487 | * nextp - address of next extent (if any) for search miss | 487 | * nextp - address of next extent (if any) for search miss |
488 | * cmpp - comparison result: | 488 | * cmpp - comparison result: |
489 | * btstack - traverse stack; | 489 | * btstack - traverse stack; |
490 | * flag - search process flag (XT_INSERT); | 490 | * flag - search process flag (XT_INSERT); |
491 | * | 491 | * |
492 | * returns: | 492 | * returns: |
493 | * btstack contains (bn, index) of search path traversed to the entry. | 493 | * btstack contains (bn, index) of search path traversed to the entry. |
494 | * *cmpp is set to result of comparison with the entry returned. | 494 | * *cmpp is set to result of comparison with the entry returned. |
495 | * the page containing the entry is pinned at exit. | 495 | * the page containing the entry is pinned at exit. |
496 | */ | 496 | */ |
497 | static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | 497 | static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, |
498 | int *cmpp, struct btstack * btstack, int flag) | 498 | int *cmpp, struct btstack * btstack, int flag) |
499 | { | 499 | { |
500 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 500 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
501 | int rc = 0; | 501 | int rc = 0; |
502 | int cmp = 1; /* init for empty page */ | 502 | int cmp = 1; /* init for empty page */ |
503 | s64 bn; /* block number */ | 503 | s64 bn; /* block number */ |
504 | struct metapage *mp; /* page buffer */ | 504 | struct metapage *mp; /* page buffer */ |
505 | xtpage_t *p; /* page */ | 505 | xtpage_t *p; /* page */ |
506 | xad_t *xad; | 506 | xad_t *xad; |
507 | int base, index, lim, btindex; | 507 | int base, index, lim, btindex; |
508 | struct btframe *btsp; | 508 | struct btframe *btsp; |
509 | int nsplit = 0; /* number of pages to split */ | 509 | int nsplit = 0; /* number of pages to split */ |
510 | s64 t64; | 510 | s64 t64; |
511 | s64 next = 0; | 511 | s64 next = 0; |
512 | 512 | ||
513 | INCREMENT(xtStat.search); | 513 | INCREMENT(xtStat.search); |
514 | 514 | ||
515 | BT_CLR(btstack); | 515 | BT_CLR(btstack); |
516 | 516 | ||
517 | btstack->nsplit = 0; | 517 | btstack->nsplit = 0; |
518 | 518 | ||
519 | /* | 519 | /* |
520 | * search down tree from root: | 520 | * search down tree from root: |
521 | * | 521 | * |
522 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of | 522 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of |
523 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. | 523 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. |
524 | * | 524 | * |
525 | * if entry with search key K is not found | 525 | * if entry with search key K is not found |
526 | * internal page search find the entry with largest key Ki | 526 | * internal page search find the entry with largest key Ki |
527 | * less than K which point to the child page to search; | 527 | * less than K which point to the child page to search; |
528 | * leaf page search find the entry with smallest key Kj | 528 | * leaf page search find the entry with smallest key Kj |
529 | * greater than K so that the returned index is the position of | 529 | * greater than K so that the returned index is the position of |
530 | * the entry to be shifted right for insertion of new entry. | 530 | * the entry to be shifted right for insertion of new entry. |
531 | * for empty tree, search key is greater than any key of the tree. | 531 | * for empty tree, search key is greater than any key of the tree. |
532 | * | 532 | * |
533 | * by convention, root bn = 0. | 533 | * by convention, root bn = 0. |
534 | */ | 534 | */ |
535 | for (bn = 0;;) { | 535 | for (bn = 0;;) { |
536 | /* get/pin the page to search */ | 536 | /* get/pin the page to search */ |
537 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 537 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
538 | if (rc) | 538 | if (rc) |
539 | return rc; | 539 | return rc; |
540 | 540 | ||
541 | /* try sequential access heuristics with the previous | 541 | /* try sequential access heuristics with the previous |
542 | * access entry in target leaf page: | 542 | * access entry in target leaf page: |
543 | * once search narrowed down into the target leaf, | 543 | * once search narrowed down into the target leaf, |
544 | * key must either match an entry in the leaf or | 544 | * key must either match an entry in the leaf or |
545 | * key entry does not exist in the tree; | 545 | * key entry does not exist in the tree; |
546 | */ | 546 | */ |
547 | //fastSearch: | 547 | //fastSearch: |
548 | if ((jfs_ip->btorder & BT_SEQUENTIAL) && | 548 | if ((jfs_ip->btorder & BT_SEQUENTIAL) && |
549 | (p->header.flag & BT_LEAF) && | 549 | (p->header.flag & BT_LEAF) && |
550 | (index = jfs_ip->btindex) < | 550 | (index = jfs_ip->btindex) < |
551 | le16_to_cpu(p->header.nextindex)) { | 551 | le16_to_cpu(p->header.nextindex)) { |
552 | xad = &p->xad[index]; | 552 | xad = &p->xad[index]; |
553 | t64 = offsetXAD(xad); | 553 | t64 = offsetXAD(xad); |
554 | if (xoff < t64 + lengthXAD(xad)) { | 554 | if (xoff < t64 + lengthXAD(xad)) { |
555 | if (xoff >= t64) { | 555 | if (xoff >= t64) { |
556 | *cmpp = 0; | 556 | *cmpp = 0; |
557 | goto out; | 557 | goto out; |
558 | } | 558 | } |
559 | 559 | ||
560 | /* stop sequential access heuristics */ | 560 | /* stop sequential access heuristics */ |
561 | goto binarySearch; | 561 | goto binarySearch; |
562 | } else { /* (t64 + lengthXAD(xad)) <= xoff */ | 562 | } else { /* (t64 + lengthXAD(xad)) <= xoff */ |
563 | 563 | ||
564 | /* try next sequential entry */ | 564 | /* try next sequential entry */ |
565 | index++; | 565 | index++; |
566 | if (index < | 566 | if (index < |
567 | le16_to_cpu(p->header.nextindex)) { | 567 | le16_to_cpu(p->header.nextindex)) { |
568 | xad++; | 568 | xad++; |
569 | t64 = offsetXAD(xad); | 569 | t64 = offsetXAD(xad); |
570 | if (xoff < t64 + lengthXAD(xad)) { | 570 | if (xoff < t64 + lengthXAD(xad)) { |
571 | if (xoff >= t64) { | 571 | if (xoff >= t64) { |
572 | *cmpp = 0; | 572 | *cmpp = 0; |
573 | goto out; | 573 | goto out; |
574 | } | 574 | } |
575 | 575 | ||
576 | /* miss: key falls between | 576 | /* miss: key falls between |
577 | * previous and this entry | 577 | * previous and this entry |
578 | */ | 578 | */ |
579 | *cmpp = 1; | 579 | *cmpp = 1; |
580 | next = t64; | 580 | next = t64; |
581 | goto out; | 581 | goto out; |
582 | } | 582 | } |
583 | 583 | ||
584 | /* (xoff >= t64 + lengthXAD(xad)); | 584 | /* (xoff >= t64 + lengthXAD(xad)); |
585 | * matching entry may be further out: | 585 | * matching entry may be further out: |
586 | * stop heuristic search | 586 | * stop heuristic search |
587 | */ | 587 | */ |
588 | /* stop sequential access heuristics */ | 588 | /* stop sequential access heuristics */ |
589 | goto binarySearch; | 589 | goto binarySearch; |
590 | } | 590 | } |
591 | 591 | ||
592 | /* (index == p->header.nextindex); | 592 | /* (index == p->header.nextindex); |
593 | * miss: key entry does not exist in | 593 | * miss: key entry does not exist in |
594 | * the target leaf/tree | 594 | * the target leaf/tree |
595 | */ | 595 | */ |
596 | *cmpp = 1; | 596 | *cmpp = 1; |
597 | goto out; | 597 | goto out; |
598 | } | 598 | } |
599 | 599 | ||
600 | /* | 600 | /* |
601 | * if hit, return index of the entry found, and | 601 | * if hit, return index of the entry found, and |
602 | * if miss, where new entry with search key is | 602 | * if miss, where new entry with search key is |
603 | * to be inserted; | 603 | * to be inserted; |
604 | */ | 604 | */ |
605 | out: | 605 | out: |
606 | /* compute number of pages to split */ | 606 | /* compute number of pages to split */ |
607 | if (flag & XT_INSERT) { | 607 | if (flag & XT_INSERT) { |
608 | if (p->header.nextindex == /* little-endian */ | 608 | if (p->header.nextindex == /* little-endian */ |
609 | p->header.maxentry) | 609 | p->header.maxentry) |
610 | nsplit++; | 610 | nsplit++; |
611 | else | 611 | else |
612 | nsplit = 0; | 612 | nsplit = 0; |
613 | btstack->nsplit = nsplit; | 613 | btstack->nsplit = nsplit; |
614 | } | 614 | } |
615 | 615 | ||
616 | /* save search result */ | 616 | /* save search result */ |
617 | btsp = btstack->top; | 617 | btsp = btstack->top; |
618 | btsp->bn = bn; | 618 | btsp->bn = bn; |
619 | btsp->index = index; | 619 | btsp->index = index; |
620 | btsp->mp = mp; | 620 | btsp->mp = mp; |
621 | 621 | ||
622 | /* update sequential access heuristics */ | 622 | /* update sequential access heuristics */ |
623 | jfs_ip->btindex = index; | 623 | jfs_ip->btindex = index; |
624 | 624 | ||
625 | if (nextp) | 625 | if (nextp) |
626 | *nextp = next; | 626 | *nextp = next; |
627 | 627 | ||
628 | INCREMENT(xtStat.fastSearch); | 628 | INCREMENT(xtStat.fastSearch); |
629 | return 0; | 629 | return 0; |
630 | } | 630 | } |
631 | 631 | ||
632 | /* well, ... full search now */ | 632 | /* well, ... full search now */ |
633 | binarySearch: | 633 | binarySearch: |
634 | lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; | 634 | lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; |
635 | 635 | ||
636 | /* | 636 | /* |
637 | * binary search with search key K on the current page | 637 | * binary search with search key K on the current page |
638 | */ | 638 | */ |
639 | for (base = XTENTRYSTART; lim; lim >>= 1) { | 639 | for (base = XTENTRYSTART; lim; lim >>= 1) { |
640 | index = base + (lim >> 1); | 640 | index = base + (lim >> 1); |
641 | 641 | ||
642 | XT_CMP(cmp, xoff, &p->xad[index], t64); | 642 | XT_CMP(cmp, xoff, &p->xad[index], t64); |
643 | if (cmp == 0) { | 643 | if (cmp == 0) { |
644 | /* | 644 | /* |
645 | * search hit | 645 | * search hit |
646 | */ | 646 | */ |
647 | /* search hit - leaf page: | 647 | /* search hit - leaf page: |
648 | * return the entry found | 648 | * return the entry found |
649 | */ | 649 | */ |
650 | if (p->header.flag & BT_LEAF) { | 650 | if (p->header.flag & BT_LEAF) { |
651 | *cmpp = cmp; | 651 | *cmpp = cmp; |
652 | 652 | ||
653 | /* compute number of pages to split */ | 653 | /* compute number of pages to split */ |
654 | if (flag & XT_INSERT) { | 654 | if (flag & XT_INSERT) { |
655 | if (p->header.nextindex == | 655 | if (p->header.nextindex == |
656 | p->header.maxentry) | 656 | p->header.maxentry) |
657 | nsplit++; | 657 | nsplit++; |
658 | else | 658 | else |
659 | nsplit = 0; | 659 | nsplit = 0; |
660 | btstack->nsplit = nsplit; | 660 | btstack->nsplit = nsplit; |
661 | } | 661 | } |
662 | 662 | ||
663 | /* save search result */ | 663 | /* save search result */ |
664 | btsp = btstack->top; | 664 | btsp = btstack->top; |
665 | btsp->bn = bn; | 665 | btsp->bn = bn; |
666 | btsp->index = index; | 666 | btsp->index = index; |
667 | btsp->mp = mp; | 667 | btsp->mp = mp; |
668 | 668 | ||
669 | /* init sequential access heuristics */ | 669 | /* init sequential access heuristics */ |
670 | btindex = jfs_ip->btindex; | 670 | btindex = jfs_ip->btindex; |
671 | if (index == btindex || | 671 | if (index == btindex || |
672 | index == btindex + 1) | 672 | index == btindex + 1) |
673 | jfs_ip->btorder = BT_SEQUENTIAL; | 673 | jfs_ip->btorder = BT_SEQUENTIAL; |
674 | else | 674 | else |
675 | jfs_ip->btorder = BT_RANDOM; | 675 | jfs_ip->btorder = BT_RANDOM; |
676 | jfs_ip->btindex = index; | 676 | jfs_ip->btindex = index; |
677 | 677 | ||
678 | return 0; | 678 | return 0; |
679 | } | 679 | } |
680 | /* search hit - internal page: | 680 | /* search hit - internal page: |
681 | * descend/search its child page | 681 | * descend/search its child page |
682 | */ | 682 | */ |
683 | if (index < le16_to_cpu(p->header.nextindex)-1) | 683 | if (index < le16_to_cpu(p->header.nextindex)-1) |
684 | next = offsetXAD(&p->xad[index + 1]); | 684 | next = offsetXAD(&p->xad[index + 1]); |
685 | goto next; | 685 | goto next; |
686 | } | 686 | } |
687 | 687 | ||
688 | if (cmp > 0) { | 688 | if (cmp > 0) { |
689 | base = index + 1; | 689 | base = index + 1; |
690 | --lim; | 690 | --lim; |
691 | } | 691 | } |
692 | } | 692 | } |
693 | 693 | ||
694 | /* | 694 | /* |
695 | * search miss | 695 | * search miss |
696 | * | 696 | * |
697 | * base is the smallest index with key (Kj) greater than | 697 | * base is the smallest index with key (Kj) greater than |
698 | * search key (K) and may be zero or maxentry index. | 698 | * search key (K) and may be zero or maxentry index. |
699 | */ | 699 | */ |
700 | if (base < le16_to_cpu(p->header.nextindex)) | 700 | if (base < le16_to_cpu(p->header.nextindex)) |
701 | next = offsetXAD(&p->xad[base]); | 701 | next = offsetXAD(&p->xad[base]); |
702 | /* | 702 | /* |
703 | * search miss - leaf page: | 703 | * search miss - leaf page: |
704 | * | 704 | * |
705 | * return location of entry (base) where new entry with | 705 | * return location of entry (base) where new entry with |
706 | * search key K is to be inserted. | 706 | * search key K is to be inserted. |
707 | */ | 707 | */ |
708 | if (p->header.flag & BT_LEAF) { | 708 | if (p->header.flag & BT_LEAF) { |
709 | *cmpp = cmp; | 709 | *cmpp = cmp; |
710 | 710 | ||
711 | /* compute number of pages to split */ | 711 | /* compute number of pages to split */ |
712 | if (flag & XT_INSERT) { | 712 | if (flag & XT_INSERT) { |
713 | if (p->header.nextindex == | 713 | if (p->header.nextindex == |
714 | p->header.maxentry) | 714 | p->header.maxentry) |
715 | nsplit++; | 715 | nsplit++; |
716 | else | 716 | else |
717 | nsplit = 0; | 717 | nsplit = 0; |
718 | btstack->nsplit = nsplit; | 718 | btstack->nsplit = nsplit; |
719 | } | 719 | } |
720 | 720 | ||
721 | /* save search result */ | 721 | /* save search result */ |
722 | btsp = btstack->top; | 722 | btsp = btstack->top; |
723 | btsp->bn = bn; | 723 | btsp->bn = bn; |
724 | btsp->index = base; | 724 | btsp->index = base; |
725 | btsp->mp = mp; | 725 | btsp->mp = mp; |
726 | 726 | ||
727 | /* init sequential access heuristics */ | 727 | /* init sequential access heuristics */ |
728 | btindex = jfs_ip->btindex; | 728 | btindex = jfs_ip->btindex; |
729 | if (base == btindex || base == btindex + 1) | 729 | if (base == btindex || base == btindex + 1) |
730 | jfs_ip->btorder = BT_SEQUENTIAL; | 730 | jfs_ip->btorder = BT_SEQUENTIAL; |
731 | else | 731 | else |
732 | jfs_ip->btorder = BT_RANDOM; | 732 | jfs_ip->btorder = BT_RANDOM; |
733 | jfs_ip->btindex = base; | 733 | jfs_ip->btindex = base; |
734 | 734 | ||
735 | if (nextp) | 735 | if (nextp) |
736 | *nextp = next; | 736 | *nextp = next; |
737 | 737 | ||
738 | return 0; | 738 | return 0; |
739 | } | 739 | } |
740 | 740 | ||
741 | /* | 741 | /* |
742 | * search miss - non-leaf page: | 742 | * search miss - non-leaf page: |
743 | * | 743 | * |
744 | * if base is non-zero, decrement base by one to get the parent | 744 | * if base is non-zero, decrement base by one to get the parent |
745 | * entry of the child page to search. | 745 | * entry of the child page to search. |
746 | */ | 746 | */ |
747 | index = base ? base - 1 : base; | 747 | index = base ? base - 1 : base; |
748 | 748 | ||
749 | /* | 749 | /* |
750 | * go down to child page | 750 | * go down to child page |
751 | */ | 751 | */ |
752 | next: | 752 | next: |
753 | /* update number of pages to split */ | 753 | /* update number of pages to split */ |
754 | if (p->header.nextindex == p->header.maxentry) | 754 | if (p->header.nextindex == p->header.maxentry) |
755 | nsplit++; | 755 | nsplit++; |
756 | else | 756 | else |
757 | nsplit = 0; | 757 | nsplit = 0; |
758 | 758 | ||
759 | /* push (bn, index) of the parent page/entry */ | 759 | /* push (bn, index) of the parent page/entry */ |
760 | if (BT_STACK_FULL(btstack)) { | 760 | if (BT_STACK_FULL(btstack)) { |
761 | jfs_error(ip->i_sb, "stack overrun in xtSearch!"); | 761 | jfs_error(ip->i_sb, "stack overrun in xtSearch!"); |
762 | XT_PUTPAGE(mp); | 762 | XT_PUTPAGE(mp); |
763 | return -EIO; | 763 | return -EIO; |
764 | } | 764 | } |
765 | BT_PUSH(btstack, bn, index); | 765 | BT_PUSH(btstack, bn, index); |
766 | 766 | ||
767 | /* get the child page block number */ | 767 | /* get the child page block number */ |
768 | bn = addressXAD(&p->xad[index]); | 768 | bn = addressXAD(&p->xad[index]); |
769 | 769 | ||
770 | /* unpin the parent page */ | 770 | /* unpin the parent page */ |
771 | XT_PUTPAGE(mp); | 771 | XT_PUTPAGE(mp); |
772 | } | 772 | } |
773 | } | 773 | } |
774 | 774 | ||
775 | /* | 775 | /* |
776 | * xtInsert() | 776 | * xtInsert() |
777 | * | 777 | * |
778 | * function: | 778 | * function: |
779 | * | 779 | * |
780 | * parameter: | 780 | * parameter: |
781 | * tid - transaction id; | 781 | * tid - transaction id; |
782 | * ip - file object; | 782 | * ip - file object; |
783 | * xflag - extent flag (XAD_NOTRECORDED): | 783 | * xflag - extent flag (XAD_NOTRECORDED): |
784 | * xoff - extent offset; | 784 | * xoff - extent offset; |
785 | * xlen - extent length; | 785 | * xlen - extent length; |
786 | * xaddrp - extent address pointer (in/out): | 786 | * xaddrp - extent address pointer (in/out): |
787 | * if (*xaddrp) | 787 | * if (*xaddrp) |
788 | * caller allocated data extent at *xaddrp; | 788 | * caller allocated data extent at *xaddrp; |
789 | * else | 789 | * else |
790 | * allocate data extent and return its xaddr; | 790 | * allocate data extent and return its xaddr; |
791 | * flag - | 791 | * flag - |
792 | * | 792 | * |
793 | * return: | 793 | * return: |
794 | */ | 794 | */ |
795 | int xtInsert(tid_t tid, /* transaction id */ | 795 | int xtInsert(tid_t tid, /* transaction id */ |
796 | struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, | 796 | struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, |
797 | int flag) | 797 | int flag) |
798 | { | 798 | { |
799 | int rc = 0; | 799 | int rc = 0; |
800 | s64 xaddr, hint; | 800 | s64 xaddr, hint; |
801 | struct metapage *mp; /* meta-page buffer */ | 801 | struct metapage *mp; /* meta-page buffer */ |
802 | xtpage_t *p; /* base B+-tree index page */ | 802 | xtpage_t *p; /* base B+-tree index page */ |
803 | s64 bn; | 803 | s64 bn; |
804 | int index, nextindex; | 804 | int index, nextindex; |
805 | struct btstack btstack; /* traverse stack */ | 805 | struct btstack btstack; /* traverse stack */ |
806 | struct xtsplit split; /* split information */ | 806 | struct xtsplit split; /* split information */ |
807 | xad_t *xad; | 807 | xad_t *xad; |
808 | int cmp; | 808 | int cmp; |
809 | s64 next; | 809 | s64 next; |
810 | struct tlock *tlck; | 810 | struct tlock *tlck; |
811 | struct xtlock *xtlck; | 811 | struct xtlock *xtlck; |
812 | 812 | ||
813 | jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); | 813 | jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); |
814 | 814 | ||
815 | /* | 815 | /* |
816 | * search for the entry location at which to insert: | 816 | * search for the entry location at which to insert: |
817 | * | 817 | * |
818 | * xtFastSearch() and xtSearch() both returns (leaf page | 818 | * xtFastSearch() and xtSearch() both returns (leaf page |
819 | * pinned, index at which to insert). | 819 | * pinned, index at which to insert). |
820 | * n.b. xtSearch() may return index of maxentry of | 820 | * n.b. xtSearch() may return index of maxentry of |
821 | * the full page. | 821 | * the full page. |
822 | */ | 822 | */ |
823 | if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) | 823 | if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) |
824 | return rc; | 824 | return rc; |
825 | 825 | ||
826 | /* retrieve search result */ | 826 | /* retrieve search result */ |
827 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); | 827 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); |
828 | 828 | ||
829 | /* This test must follow XT_GETSEARCH since mp must be valid if | 829 | /* This test must follow XT_GETSEARCH since mp must be valid if |
830 | * we branch to out: */ | 830 | * we branch to out: */ |
831 | if ((cmp == 0) || (next && (xlen > next - xoff))) { | 831 | if ((cmp == 0) || (next && (xlen > next - xoff))) { |
832 | rc = -EEXIST; | 832 | rc = -EEXIST; |
833 | goto out; | 833 | goto out; |
834 | } | 834 | } |
835 | 835 | ||
836 | /* | 836 | /* |
837 | * allocate data extent requested | 837 | * allocate data extent requested |
838 | * | 838 | * |
839 | * allocation hint: last xad | 839 | * allocation hint: last xad |
840 | */ | 840 | */ |
841 | if ((xaddr = *xaddrp) == 0) { | 841 | if ((xaddr = *xaddrp) == 0) { |
842 | if (index > XTENTRYSTART) { | 842 | if (index > XTENTRYSTART) { |
843 | xad = &p->xad[index - 1]; | 843 | xad = &p->xad[index - 1]; |
844 | hint = addressXAD(xad) + lengthXAD(xad) - 1; | 844 | hint = addressXAD(xad) + lengthXAD(xad) - 1; |
845 | } else | 845 | } else |
846 | hint = 0; | 846 | hint = 0; |
847 | if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) | 847 | if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) |
848 | goto out; | 848 | goto out; |
849 | if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { | 849 | if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { |
850 | DQUOT_FREE_BLOCK(ip, xlen); | 850 | DQUOT_FREE_BLOCK(ip, xlen); |
851 | goto out; | 851 | goto out; |
852 | } | 852 | } |
853 | } | 853 | } |
854 | 854 | ||
855 | /* | 855 | /* |
856 | * insert entry for new extent | 856 | * insert entry for new extent |
857 | */ | 857 | */ |
858 | xflag |= XAD_NEW; | 858 | xflag |= XAD_NEW; |
859 | 859 | ||
860 | /* | 860 | /* |
861 | * if the leaf page is full, split the page and | 861 | * if the leaf page is full, split the page and |
862 | * propagate up the router entry for the new page from split | 862 | * propagate up the router entry for the new page from split |
863 | * | 863 | * |
864 | * The xtSplitUp() will insert the entry and unpin the leaf page. | 864 | * The xtSplitUp() will insert the entry and unpin the leaf page. |
865 | */ | 865 | */ |
866 | nextindex = le16_to_cpu(p->header.nextindex); | 866 | nextindex = le16_to_cpu(p->header.nextindex); |
867 | if (nextindex == le16_to_cpu(p->header.maxentry)) { | 867 | if (nextindex == le16_to_cpu(p->header.maxentry)) { |
868 | split.mp = mp; | 868 | split.mp = mp; |
869 | split.index = index; | 869 | split.index = index; |
870 | split.flag = xflag; | 870 | split.flag = xflag; |
871 | split.off = xoff; | 871 | split.off = xoff; |
872 | split.len = xlen; | 872 | split.len = xlen; |
873 | split.addr = xaddr; | 873 | split.addr = xaddr; |
874 | split.pxdlist = NULL; | 874 | split.pxdlist = NULL; |
875 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { | 875 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { |
876 | /* undo data extent allocation */ | 876 | /* undo data extent allocation */ |
877 | if (*xaddrp == 0) { | 877 | if (*xaddrp == 0) { |
878 | dbFree(ip, xaddr, (s64) xlen); | 878 | dbFree(ip, xaddr, (s64) xlen); |
879 | DQUOT_FREE_BLOCK(ip, xlen); | 879 | DQUOT_FREE_BLOCK(ip, xlen); |
880 | } | 880 | } |
881 | return rc; | 881 | return rc; |
882 | } | 882 | } |
883 | 883 | ||
884 | *xaddrp = xaddr; | 884 | *xaddrp = xaddr; |
885 | return 0; | 885 | return 0; |
886 | } | 886 | } |
887 | 887 | ||
888 | /* | 888 | /* |
889 | * insert the new entry into the leaf page | 889 | * insert the new entry into the leaf page |
890 | */ | 890 | */ |
891 | /* | 891 | /* |
892 | * acquire a transaction lock on the leaf page; | 892 | * acquire a transaction lock on the leaf page; |
893 | * | 893 | * |
894 | * action: xad insertion/extension; | 894 | * action: xad insertion/extension; |
895 | */ | 895 | */ |
896 | BT_MARK_DIRTY(mp, ip); | 896 | BT_MARK_DIRTY(mp, ip); |
897 | 897 | ||
898 | /* if insert into middle, shift right remaining entries. */ | 898 | /* if insert into middle, shift right remaining entries. */ |
899 | if (index < nextindex) | 899 | if (index < nextindex) |
900 | memmove(&p->xad[index + 1], &p->xad[index], | 900 | memmove(&p->xad[index + 1], &p->xad[index], |
901 | (nextindex - index) * sizeof(xad_t)); | 901 | (nextindex - index) * sizeof(xad_t)); |
902 | 902 | ||
903 | /* insert the new entry: mark the entry NEW */ | 903 | /* insert the new entry: mark the entry NEW */ |
904 | xad = &p->xad[index]; | 904 | xad = &p->xad[index]; |
905 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); | 905 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); |
906 | 906 | ||
907 | /* advance next available entry index */ | 907 | /* advance next available entry index */ |
908 | p->header.nextindex = | 908 | le16_add_cpu(&p->header.nextindex, 1); |
909 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
910 | 909 | ||
911 | /* Don't log it if there are no links to the file */ | 910 | /* Don't log it if there are no links to the file */ |
912 | if (!test_cflag(COMMIT_Nolink, ip)) { | 911 | if (!test_cflag(COMMIT_Nolink, ip)) { |
913 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); | 912 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); |
914 | xtlck = (struct xtlock *) & tlck->lock; | 913 | xtlck = (struct xtlock *) & tlck->lock; |
915 | xtlck->lwm.offset = | 914 | xtlck->lwm.offset = |
916 | (xtlck->lwm.offset) ? min(index, | 915 | (xtlck->lwm.offset) ? min(index, |
917 | (int)xtlck->lwm.offset) : index; | 916 | (int)xtlck->lwm.offset) : index; |
918 | xtlck->lwm.length = | 917 | xtlck->lwm.length = |
919 | le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; | 918 | le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; |
920 | } | 919 | } |
921 | 920 | ||
922 | *xaddrp = xaddr; | 921 | *xaddrp = xaddr; |
923 | 922 | ||
924 | out: | 923 | out: |
925 | /* unpin the leaf page */ | 924 | /* unpin the leaf page */ |
926 | XT_PUTPAGE(mp); | 925 | XT_PUTPAGE(mp); |
927 | 926 | ||
928 | return rc; | 927 | return rc; |
929 | } | 928 | } |
930 | 929 | ||
931 | 930 | ||
932 | /* | 931 | /* |
933 | * xtSplitUp() | 932 | * xtSplitUp() |
934 | * | 933 | * |
935 | * function: | 934 | * function: |
936 | * split full pages as propagating insertion up the tree | 935 | * split full pages as propagating insertion up the tree |
937 | * | 936 | * |
938 | * parameter: | 937 | * parameter: |
939 | * tid - transaction id; | 938 | * tid - transaction id; |
940 | * ip - file object; | 939 | * ip - file object; |
941 | * split - entry parameter descriptor; | 940 | * split - entry parameter descriptor; |
942 | * btstack - traverse stack from xtSearch() | 941 | * btstack - traverse stack from xtSearch() |
943 | * | 942 | * |
944 | * return: | 943 | * return: |
945 | */ | 944 | */ |
946 | static int | 945 | static int |
947 | xtSplitUp(tid_t tid, | 946 | xtSplitUp(tid_t tid, |
948 | struct inode *ip, struct xtsplit * split, struct btstack * btstack) | 947 | struct inode *ip, struct xtsplit * split, struct btstack * btstack) |
949 | { | 948 | { |
950 | int rc = 0; | 949 | int rc = 0; |
951 | struct metapage *smp; | 950 | struct metapage *smp; |
952 | xtpage_t *sp; /* split page */ | 951 | xtpage_t *sp; /* split page */ |
953 | struct metapage *rmp; | 952 | struct metapage *rmp; |
954 | s64 rbn; /* new right page block number */ | 953 | s64 rbn; /* new right page block number */ |
955 | struct metapage *rcmp; | 954 | struct metapage *rcmp; |
956 | xtpage_t *rcp; /* right child page */ | 955 | xtpage_t *rcp; /* right child page */ |
957 | s64 rcbn; /* right child page block number */ | 956 | s64 rcbn; /* right child page block number */ |
958 | int skip; /* index of entry of insertion */ | 957 | int skip; /* index of entry of insertion */ |
959 | int nextindex; /* next available entry index of p */ | 958 | int nextindex; /* next available entry index of p */ |
960 | struct btframe *parent; /* parent page entry on traverse stack */ | 959 | struct btframe *parent; /* parent page entry on traverse stack */ |
961 | xad_t *xad; | 960 | xad_t *xad; |
962 | s64 xaddr; | 961 | s64 xaddr; |
963 | int xlen; | 962 | int xlen; |
964 | int nsplit; /* number of pages split */ | 963 | int nsplit; /* number of pages split */ |
965 | struct pxdlist pxdlist; | 964 | struct pxdlist pxdlist; |
966 | pxd_t *pxd; | 965 | pxd_t *pxd; |
967 | struct tlock *tlck; | 966 | struct tlock *tlck; |
968 | struct xtlock *xtlck; | 967 | struct xtlock *xtlck; |
969 | 968 | ||
970 | smp = split->mp; | 969 | smp = split->mp; |
971 | sp = XT_PAGE(ip, smp); | 970 | sp = XT_PAGE(ip, smp); |
972 | 971 | ||
973 | /* is inode xtree root extension/inline EA area free ? */ | 972 | /* is inode xtree root extension/inline EA area free ? */ |
974 | if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && | 973 | if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && |
975 | (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && | 974 | (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && |
976 | (JFS_IP(ip)->mode2 & INLINEEA)) { | 975 | (JFS_IP(ip)->mode2 & INLINEEA)) { |
977 | sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); | 976 | sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); |
978 | JFS_IP(ip)->mode2 &= ~INLINEEA; | 977 | JFS_IP(ip)->mode2 &= ~INLINEEA; |
979 | 978 | ||
980 | BT_MARK_DIRTY(smp, ip); | 979 | BT_MARK_DIRTY(smp, ip); |
981 | /* | 980 | /* |
982 | * acquire a transaction lock on the leaf page; | 981 | * acquire a transaction lock on the leaf page; |
983 | * | 982 | * |
984 | * action: xad insertion/extension; | 983 | * action: xad insertion/extension; |
985 | */ | 984 | */ |
986 | 985 | ||
987 | /* if insert into middle, shift right remaining entries. */ | 986 | /* if insert into middle, shift right remaining entries. */ |
988 | skip = split->index; | 987 | skip = split->index; |
989 | nextindex = le16_to_cpu(sp->header.nextindex); | 988 | nextindex = le16_to_cpu(sp->header.nextindex); |
990 | if (skip < nextindex) | 989 | if (skip < nextindex) |
991 | memmove(&sp->xad[skip + 1], &sp->xad[skip], | 990 | memmove(&sp->xad[skip + 1], &sp->xad[skip], |
992 | (nextindex - skip) * sizeof(xad_t)); | 991 | (nextindex - skip) * sizeof(xad_t)); |
993 | 992 | ||
994 | /* insert the new entry: mark the entry NEW */ | 993 | /* insert the new entry: mark the entry NEW */ |
995 | xad = &sp->xad[skip]; | 994 | xad = &sp->xad[skip]; |
996 | XT_PUTENTRY(xad, split->flag, split->off, split->len, | 995 | XT_PUTENTRY(xad, split->flag, split->off, split->len, |
997 | split->addr); | 996 | split->addr); |
998 | 997 | ||
999 | /* advance next available entry index */ | 998 | /* advance next available entry index */ |
1000 | sp->header.nextindex = | 999 | le16_add_cpu(&sp->header.nextindex, 1); |
1001 | cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); | ||
1002 | 1000 | ||
1003 | /* Don't log it if there are no links to the file */ | 1001 | /* Don't log it if there are no links to the file */ |
1004 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1002 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1005 | tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); | 1003 | tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); |
1006 | xtlck = (struct xtlock *) & tlck->lock; | 1004 | xtlck = (struct xtlock *) & tlck->lock; |
1007 | xtlck->lwm.offset = (xtlck->lwm.offset) ? | 1005 | xtlck->lwm.offset = (xtlck->lwm.offset) ? |
1008 | min(skip, (int)xtlck->lwm.offset) : skip; | 1006 | min(skip, (int)xtlck->lwm.offset) : skip; |
1009 | xtlck->lwm.length = | 1007 | xtlck->lwm.length = |
1010 | le16_to_cpu(sp->header.nextindex) - | 1008 | le16_to_cpu(sp->header.nextindex) - |
1011 | xtlck->lwm.offset; | 1009 | xtlck->lwm.offset; |
1012 | } | 1010 | } |
1013 | 1011 | ||
1014 | return 0; | 1012 | return 0; |
1015 | } | 1013 | } |
1016 | 1014 | ||
1017 | /* | 1015 | /* |
1018 | * allocate new index blocks to cover index page split(s) | 1016 | * allocate new index blocks to cover index page split(s) |
1019 | * | 1017 | * |
1020 | * allocation hint: ? | 1018 | * allocation hint: ? |
1021 | */ | 1019 | */ |
1022 | if (split->pxdlist == NULL) { | 1020 | if (split->pxdlist == NULL) { |
1023 | nsplit = btstack->nsplit; | 1021 | nsplit = btstack->nsplit; |
1024 | split->pxdlist = &pxdlist; | 1022 | split->pxdlist = &pxdlist; |
1025 | pxdlist.maxnpxd = pxdlist.npxd = 0; | 1023 | pxdlist.maxnpxd = pxdlist.npxd = 0; |
1026 | pxd = &pxdlist.pxd[0]; | 1024 | pxd = &pxdlist.pxd[0]; |
1027 | xlen = JFS_SBI(ip->i_sb)->nbperpage; | 1025 | xlen = JFS_SBI(ip->i_sb)->nbperpage; |
1028 | for (; nsplit > 0; nsplit--, pxd++) { | 1026 | for (; nsplit > 0; nsplit--, pxd++) { |
1029 | if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) | 1027 | if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) |
1030 | == 0) { | 1028 | == 0) { |
1031 | PXDaddress(pxd, xaddr); | 1029 | PXDaddress(pxd, xaddr); |
1032 | PXDlength(pxd, xlen); | 1030 | PXDlength(pxd, xlen); |
1033 | 1031 | ||
1034 | pxdlist.maxnpxd++; | 1032 | pxdlist.maxnpxd++; |
1035 | 1033 | ||
1036 | continue; | 1034 | continue; |
1037 | } | 1035 | } |
1038 | 1036 | ||
1039 | /* undo allocation */ | 1037 | /* undo allocation */ |
1040 | 1038 | ||
1041 | XT_PUTPAGE(smp); | 1039 | XT_PUTPAGE(smp); |
1042 | return rc; | 1040 | return rc; |
1043 | } | 1041 | } |
1044 | } | 1042 | } |
1045 | 1043 | ||
1046 | /* | 1044 | /* |
1047 | * Split leaf page <sp> into <sp> and a new right page <rp>. | 1045 | * Split leaf page <sp> into <sp> and a new right page <rp>. |
1048 | * | 1046 | * |
1049 | * The split routines insert the new entry into the leaf page, | 1047 | * The split routines insert the new entry into the leaf page, |
1050 | * and acquire txLock as appropriate. | 1048 | * and acquire txLock as appropriate. |
1051 | * return <rp> pinned and its block number <rpbn>. | 1049 | * return <rp> pinned and its block number <rpbn>. |
1052 | */ | 1050 | */ |
1053 | rc = (sp->header.flag & BT_ROOT) ? | 1051 | rc = (sp->header.flag & BT_ROOT) ? |
1054 | xtSplitRoot(tid, ip, split, &rmp) : | 1052 | xtSplitRoot(tid, ip, split, &rmp) : |
1055 | xtSplitPage(tid, ip, split, &rmp, &rbn); | 1053 | xtSplitPage(tid, ip, split, &rmp, &rbn); |
1056 | 1054 | ||
1057 | XT_PUTPAGE(smp); | 1055 | XT_PUTPAGE(smp); |
1058 | 1056 | ||
1059 | if (rc) | 1057 | if (rc) |
1060 | return -EIO; | 1058 | return -EIO; |
1061 | /* | 1059 | /* |
1062 | * propagate up the router entry for the leaf page just split | 1060 | * propagate up the router entry for the leaf page just split |
1063 | * | 1061 | * |
1064 | * insert a router entry for the new page into the parent page, | 1062 | * insert a router entry for the new page into the parent page, |
1065 | * propagate the insert/split up the tree by walking back the stack | 1063 | * propagate the insert/split up the tree by walking back the stack |
1066 | * of (bn of parent page, index of child page entry in parent page) | 1064 | * of (bn of parent page, index of child page entry in parent page) |
1067 | * that were traversed during the search for the page that split. | 1065 | * that were traversed during the search for the page that split. |
1068 | * | 1066 | * |
1069 | * the propagation of insert/split up the tree stops if the root | 1067 | * the propagation of insert/split up the tree stops if the root |
1070 | * splits or the page inserted into doesn't have to split to hold | 1068 | * splits or the page inserted into doesn't have to split to hold |
1071 | * the new entry. | 1069 | * the new entry. |
1072 | * | 1070 | * |
1073 | * the parent entry for the split page remains the same, and | 1071 | * the parent entry for the split page remains the same, and |
1074 | * a new entry is inserted at its right with the first key and | 1072 | * a new entry is inserted at its right with the first key and |
1075 | * block number of the new right page. | 1073 | * block number of the new right page. |
1076 | * | 1074 | * |
1077 | * There are a maximum of 3 pages pinned at any time: | 1075 | * There are a maximum of 3 pages pinned at any time: |
1078 | * right child, left parent and right parent (when the parent splits) | 1076 | * right child, left parent and right parent (when the parent splits) |
1079 | * to keep the child page pinned while working on the parent. | 1077 | * to keep the child page pinned while working on the parent. |
1080 | * make sure that all pins are released at exit. | 1078 | * make sure that all pins are released at exit. |
1081 | */ | 1079 | */ |
1082 | while ((parent = BT_POP(btstack)) != NULL) { | 1080 | while ((parent = BT_POP(btstack)) != NULL) { |
1083 | /* parent page specified by stack frame <parent> */ | 1081 | /* parent page specified by stack frame <parent> */ |
1084 | 1082 | ||
1085 | /* keep current child pages <rcp> pinned */ | 1083 | /* keep current child pages <rcp> pinned */ |
1086 | rcmp = rmp; | 1084 | rcmp = rmp; |
1087 | rcbn = rbn; | 1085 | rcbn = rbn; |
1088 | rcp = XT_PAGE(ip, rcmp); | 1086 | rcp = XT_PAGE(ip, rcmp); |
1089 | 1087 | ||
1090 | /* | 1088 | /* |
1091 | * insert router entry in parent for new right child page <rp> | 1089 | * insert router entry in parent for new right child page <rp> |
1092 | */ | 1090 | */ |
1093 | /* get/pin the parent page <sp> */ | 1091 | /* get/pin the parent page <sp> */ |
1094 | XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); | 1092 | XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); |
1095 | if (rc) { | 1093 | if (rc) { |
1096 | XT_PUTPAGE(rcmp); | 1094 | XT_PUTPAGE(rcmp); |
1097 | return rc; | 1095 | return rc; |
1098 | } | 1096 | } |
1099 | 1097 | ||
1100 | /* | 1098 | /* |
1101 | * The new key entry goes ONE AFTER the index of parent entry, | 1099 | * The new key entry goes ONE AFTER the index of parent entry, |
1102 | * because the split was to the right. | 1100 | * because the split was to the right. |
1103 | */ | 1101 | */ |
1104 | skip = parent->index + 1; | 1102 | skip = parent->index + 1; |
1105 | 1103 | ||
1106 | /* | 1104 | /* |
1107 | * split or shift right remaining entries of the parent page | 1105 | * split or shift right remaining entries of the parent page |
1108 | */ | 1106 | */ |
1109 | nextindex = le16_to_cpu(sp->header.nextindex); | 1107 | nextindex = le16_to_cpu(sp->header.nextindex); |
1110 | /* | 1108 | /* |
1111 | * parent page is full - split the parent page | 1109 | * parent page is full - split the parent page |
1112 | */ | 1110 | */ |
1113 | if (nextindex == le16_to_cpu(sp->header.maxentry)) { | 1111 | if (nextindex == le16_to_cpu(sp->header.maxentry)) { |
1114 | /* init for parent page split */ | 1112 | /* init for parent page split */ |
1115 | split->mp = smp; | 1113 | split->mp = smp; |
1116 | split->index = skip; /* index at insert */ | 1114 | split->index = skip; /* index at insert */ |
1117 | split->flag = XAD_NEW; | 1115 | split->flag = XAD_NEW; |
1118 | split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); | 1116 | split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); |
1119 | split->len = JFS_SBI(ip->i_sb)->nbperpage; | 1117 | split->len = JFS_SBI(ip->i_sb)->nbperpage; |
1120 | split->addr = rcbn; | 1118 | split->addr = rcbn; |
1121 | 1119 | ||
1122 | /* unpin previous right child page */ | 1120 | /* unpin previous right child page */ |
1123 | XT_PUTPAGE(rcmp); | 1121 | XT_PUTPAGE(rcmp); |
1124 | 1122 | ||
1125 | /* The split routines insert the new entry, | 1123 | /* The split routines insert the new entry, |
1126 | * and acquire txLock as appropriate. | 1124 | * and acquire txLock as appropriate. |
1127 | * return <rp> pinned and its block number <rpbn>. | 1125 | * return <rp> pinned and its block number <rpbn>. |
1128 | */ | 1126 | */ |
1129 | rc = (sp->header.flag & BT_ROOT) ? | 1127 | rc = (sp->header.flag & BT_ROOT) ? |
1130 | xtSplitRoot(tid, ip, split, &rmp) : | 1128 | xtSplitRoot(tid, ip, split, &rmp) : |
1131 | xtSplitPage(tid, ip, split, &rmp, &rbn); | 1129 | xtSplitPage(tid, ip, split, &rmp, &rbn); |
1132 | if (rc) { | 1130 | if (rc) { |
1133 | XT_PUTPAGE(smp); | 1131 | XT_PUTPAGE(smp); |
1134 | return rc; | 1132 | return rc; |
1135 | } | 1133 | } |
1136 | 1134 | ||
1137 | XT_PUTPAGE(smp); | 1135 | XT_PUTPAGE(smp); |
1138 | /* keep new child page <rp> pinned */ | 1136 | /* keep new child page <rp> pinned */ |
1139 | } | 1137 | } |
1140 | /* | 1138 | /* |
1141 | * parent page is not full - insert in parent page | 1139 | * parent page is not full - insert in parent page |
1142 | */ | 1140 | */ |
1143 | else { | 1141 | else { |
1144 | /* | 1142 | /* |
1145 | * insert router entry in parent for the right child | 1143 | * insert router entry in parent for the right child |
1146 | * page from the first entry of the right child page: | 1144 | * page from the first entry of the right child page: |
1147 | */ | 1145 | */ |
1148 | /* | 1146 | /* |
1149 | * acquire a transaction lock on the parent page; | 1147 | * acquire a transaction lock on the parent page; |
1150 | * | 1148 | * |
1151 | * action: router xad insertion; | 1149 | * action: router xad insertion; |
1152 | */ | 1150 | */ |
1153 | BT_MARK_DIRTY(smp, ip); | 1151 | BT_MARK_DIRTY(smp, ip); |
1154 | 1152 | ||
1155 | /* | 1153 | /* |
1156 | * if insert into middle, shift right remaining entries | 1154 | * if insert into middle, shift right remaining entries |
1157 | */ | 1155 | */ |
1158 | if (skip < nextindex) | 1156 | if (skip < nextindex) |
1159 | memmove(&sp->xad[skip + 1], &sp->xad[skip], | 1157 | memmove(&sp->xad[skip + 1], &sp->xad[skip], |
1160 | (nextindex - | 1158 | (nextindex - |
1161 | skip) << L2XTSLOTSIZE); | 1159 | skip) << L2XTSLOTSIZE); |
1162 | 1160 | ||
1163 | /* insert the router entry */ | 1161 | /* insert the router entry */ |
1164 | xad = &sp->xad[skip]; | 1162 | xad = &sp->xad[skip]; |
1165 | XT_PUTENTRY(xad, XAD_NEW, | 1163 | XT_PUTENTRY(xad, XAD_NEW, |
1166 | offsetXAD(&rcp->xad[XTENTRYSTART]), | 1164 | offsetXAD(&rcp->xad[XTENTRYSTART]), |
1167 | JFS_SBI(ip->i_sb)->nbperpage, rcbn); | 1165 | JFS_SBI(ip->i_sb)->nbperpage, rcbn); |
1168 | 1166 | ||
1169 | /* advance next available entry index. */ | 1167 | /* advance next available entry index. */ |
1170 | sp->header.nextindex = | 1168 | le16_add_cpu(&sp->header.nextindex, 1); |
1171 | cpu_to_le16(le16_to_cpu(sp->header.nextindex) + | ||
1172 | 1); | ||
1173 | 1169 | ||
1174 | /* Don't log it if there are no links to the file */ | 1170 | /* Don't log it if there are no links to the file */ |
1175 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1171 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1176 | tlck = txLock(tid, ip, smp, | 1172 | tlck = txLock(tid, ip, smp, |
1177 | tlckXTREE | tlckGROW); | 1173 | tlckXTREE | tlckGROW); |
1178 | xtlck = (struct xtlock *) & tlck->lock; | 1174 | xtlck = (struct xtlock *) & tlck->lock; |
1179 | xtlck->lwm.offset = (xtlck->lwm.offset) ? | 1175 | xtlck->lwm.offset = (xtlck->lwm.offset) ? |
1180 | min(skip, (int)xtlck->lwm.offset) : skip; | 1176 | min(skip, (int)xtlck->lwm.offset) : skip; |
1181 | xtlck->lwm.length = | 1177 | xtlck->lwm.length = |
1182 | le16_to_cpu(sp->header.nextindex) - | 1178 | le16_to_cpu(sp->header.nextindex) - |
1183 | xtlck->lwm.offset; | 1179 | xtlck->lwm.offset; |
1184 | } | 1180 | } |
1185 | 1181 | ||
1186 | /* unpin parent page */ | 1182 | /* unpin parent page */ |
1187 | XT_PUTPAGE(smp); | 1183 | XT_PUTPAGE(smp); |
1188 | 1184 | ||
1189 | /* exit propagate up */ | 1185 | /* exit propagate up */ |
1190 | break; | 1186 | break; |
1191 | } | 1187 | } |
1192 | } | 1188 | } |
1193 | 1189 | ||
1194 | /* unpin current right page */ | 1190 | /* unpin current right page */ |
1195 | XT_PUTPAGE(rmp); | 1191 | XT_PUTPAGE(rmp); |
1196 | 1192 | ||
1197 | return 0; | 1193 | return 0; |
1198 | } | 1194 | } |
1199 | 1195 | ||
1200 | 1196 | ||
1201 | /* | 1197 | /* |
1202 | * xtSplitPage() | 1198 | * xtSplitPage() |
1203 | * | 1199 | * |
1204 | * function: | 1200 | * function: |
1205 | * split a full non-root page into | 1201 | * split a full non-root page into |
1206 | * original/split/left page and new right page | 1202 | * original/split/left page and new right page |
1207 | * i.e., the original/split page remains as left page. | 1203 | * i.e., the original/split page remains as left page. |
1208 | * | 1204 | * |
1209 | * parameter: | 1205 | * parameter: |
1210 | * int tid, | 1206 | * int tid, |
1211 | * struct inode *ip, | 1207 | * struct inode *ip, |
1212 | * struct xtsplit *split, | 1208 | * struct xtsplit *split, |
1213 | * struct metapage **rmpp, | 1209 | * struct metapage **rmpp, |
1214 | * u64 *rbnp, | 1210 | * u64 *rbnp, |
1215 | * | 1211 | * |
1216 | * return: | 1212 | * return: |
1217 | * Pointer to page in which to insert or NULL on error. | 1213 | * Pointer to page in which to insert or NULL on error. |
1218 | */ | 1214 | */ |
1219 | static int | 1215 | static int |
1220 | xtSplitPage(tid_t tid, struct inode *ip, | 1216 | xtSplitPage(tid_t tid, struct inode *ip, |
1221 | struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) | 1217 | struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) |
1222 | { | 1218 | { |
1223 | int rc = 0; | 1219 | int rc = 0; |
1224 | struct metapage *smp; | 1220 | struct metapage *smp; |
1225 | xtpage_t *sp; | 1221 | xtpage_t *sp; |
1226 | struct metapage *rmp; | 1222 | struct metapage *rmp; |
1227 | xtpage_t *rp; /* new right page allocated */ | 1223 | xtpage_t *rp; /* new right page allocated */ |
1228 | s64 rbn; /* new right page block number */ | 1224 | s64 rbn; /* new right page block number */ |
1229 | struct metapage *mp; | 1225 | struct metapage *mp; |
1230 | xtpage_t *p; | 1226 | xtpage_t *p; |
1231 | s64 nextbn; | 1227 | s64 nextbn; |
1232 | int skip, maxentry, middle, righthalf, n; | 1228 | int skip, maxentry, middle, righthalf, n; |
1233 | xad_t *xad; | 1229 | xad_t *xad; |
1234 | struct pxdlist *pxdlist; | 1230 | struct pxdlist *pxdlist; |
1235 | pxd_t *pxd; | 1231 | pxd_t *pxd; |
1236 | struct tlock *tlck; | 1232 | struct tlock *tlck; |
1237 | struct xtlock *sxtlck = NULL, *rxtlck = NULL; | 1233 | struct xtlock *sxtlck = NULL, *rxtlck = NULL; |
1238 | int quota_allocation = 0; | 1234 | int quota_allocation = 0; |
1239 | 1235 | ||
1240 | smp = split->mp; | 1236 | smp = split->mp; |
1241 | sp = XT_PAGE(ip, smp); | 1237 | sp = XT_PAGE(ip, smp); |
1242 | 1238 | ||
1243 | INCREMENT(xtStat.split); | 1239 | INCREMENT(xtStat.split); |
1244 | 1240 | ||
1245 | pxdlist = split->pxdlist; | 1241 | pxdlist = split->pxdlist; |
1246 | pxd = &pxdlist->pxd[pxdlist->npxd]; | 1242 | pxd = &pxdlist->pxd[pxdlist->npxd]; |
1247 | pxdlist->npxd++; | 1243 | pxdlist->npxd++; |
1248 | rbn = addressPXD(pxd); | 1244 | rbn = addressPXD(pxd); |
1249 | 1245 | ||
1250 | /* Allocate blocks to quota. */ | 1246 | /* Allocate blocks to quota. */ |
1251 | if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { | 1247 | if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { |
1252 | rc = -EDQUOT; | 1248 | rc = -EDQUOT; |
1253 | goto clean_up; | 1249 | goto clean_up; |
1254 | } | 1250 | } |
1255 | 1251 | ||
1256 | quota_allocation += lengthPXD(pxd); | 1252 | quota_allocation += lengthPXD(pxd); |
1257 | 1253 | ||
1258 | /* | 1254 | /* |
1259 | * allocate the new right page for the split | 1255 | * allocate the new right page for the split |
1260 | */ | 1256 | */ |
1261 | rmp = get_metapage(ip, rbn, PSIZE, 1); | 1257 | rmp = get_metapage(ip, rbn, PSIZE, 1); |
1262 | if (rmp == NULL) { | 1258 | if (rmp == NULL) { |
1263 | rc = -EIO; | 1259 | rc = -EIO; |
1264 | goto clean_up; | 1260 | goto clean_up; |
1265 | } | 1261 | } |
1266 | 1262 | ||
1267 | jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); | 1263 | jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); |
1268 | 1264 | ||
1269 | BT_MARK_DIRTY(rmp, ip); | 1265 | BT_MARK_DIRTY(rmp, ip); |
1270 | /* | 1266 | /* |
1271 | * action: new page; | 1267 | * action: new page; |
1272 | */ | 1268 | */ |
1273 | 1269 | ||
1274 | rp = (xtpage_t *) rmp->data; | 1270 | rp = (xtpage_t *) rmp->data; |
1275 | rp->header.self = *pxd; | 1271 | rp->header.self = *pxd; |
1276 | rp->header.flag = sp->header.flag & BT_TYPE; | 1272 | rp->header.flag = sp->header.flag & BT_TYPE; |
1277 | rp->header.maxentry = sp->header.maxentry; /* little-endian */ | 1273 | rp->header.maxentry = sp->header.maxentry; /* little-endian */ |
1278 | rp->header.nextindex = cpu_to_le16(XTENTRYSTART); | 1274 | rp->header.nextindex = cpu_to_le16(XTENTRYSTART); |
1279 | 1275 | ||
1280 | BT_MARK_DIRTY(smp, ip); | 1276 | BT_MARK_DIRTY(smp, ip); |
1281 | /* Don't log it if there are no links to the file */ | 1277 | /* Don't log it if there are no links to the file */ |
1282 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1278 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1283 | /* | 1279 | /* |
1284 | * acquire a transaction lock on the new right page; | 1280 | * acquire a transaction lock on the new right page; |
1285 | */ | 1281 | */ |
1286 | tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); | 1282 | tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); |
1287 | rxtlck = (struct xtlock *) & tlck->lock; | 1283 | rxtlck = (struct xtlock *) & tlck->lock; |
1288 | rxtlck->lwm.offset = XTENTRYSTART; | 1284 | rxtlck->lwm.offset = XTENTRYSTART; |
1289 | /* | 1285 | /* |
1290 | * acquire a transaction lock on the split page | 1286 | * acquire a transaction lock on the split page |
1291 | */ | 1287 | */ |
1292 | tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); | 1288 | tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); |
1293 | sxtlck = (struct xtlock *) & tlck->lock; | 1289 | sxtlck = (struct xtlock *) & tlck->lock; |
1294 | } | 1290 | } |
1295 | 1291 | ||
1296 | /* | 1292 | /* |
1297 | * initialize/update sibling pointers of <sp> and <rp> | 1293 | * initialize/update sibling pointers of <sp> and <rp> |
1298 | */ | 1294 | */ |
1299 | nextbn = le64_to_cpu(sp->header.next); | 1295 | nextbn = le64_to_cpu(sp->header.next); |
1300 | rp->header.next = cpu_to_le64(nextbn); | 1296 | rp->header.next = cpu_to_le64(nextbn); |
1301 | rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); | 1297 | rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); |
1302 | sp->header.next = cpu_to_le64(rbn); | 1298 | sp->header.next = cpu_to_le64(rbn); |
1303 | 1299 | ||
1304 | skip = split->index; | 1300 | skip = split->index; |
1305 | 1301 | ||
1306 | /* | 1302 | /* |
1307 | * sequential append at tail (after last entry of last page) | 1303 | * sequential append at tail (after last entry of last page) |
1308 | * | 1304 | * |
1309 | * if splitting the last page on a level because of appending | 1305 | * if splitting the last page on a level because of appending |
1310 | * a entry to it (skip is maxentry), it's likely that the access is | 1306 | * a entry to it (skip is maxentry), it's likely that the access is |
1311 | * sequential. adding an empty page on the side of the level is less | 1307 | * sequential. adding an empty page on the side of the level is less |
1312 | * work and can push the fill factor much higher than normal. | 1308 | * work and can push the fill factor much higher than normal. |
1313 | * if we're wrong it's no big deal - we will do the split the right | 1309 | * if we're wrong it's no big deal - we will do the split the right |
1314 | * way next time. | 1310 | * way next time. |
1315 | * (it may look like it's equally easy to do a similar hack for | 1311 | * (it may look like it's equally easy to do a similar hack for |
1316 | * reverse sorted data, that is, split the tree left, but it's not. | 1312 | * reverse sorted data, that is, split the tree left, but it's not. |
1317 | * Be my guest.) | 1313 | * Be my guest.) |
1318 | */ | 1314 | */ |
1319 | if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { | 1315 | if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { |
1320 | /* | 1316 | /* |
1321 | * acquire a transaction lock on the new/right page; | 1317 | * acquire a transaction lock on the new/right page; |
1322 | * | 1318 | * |
1323 | * action: xad insertion; | 1319 | * action: xad insertion; |
1324 | */ | 1320 | */ |
1325 | /* insert entry at the first entry of the new right page */ | 1321 | /* insert entry at the first entry of the new right page */ |
1326 | xad = &rp->xad[XTENTRYSTART]; | 1322 | xad = &rp->xad[XTENTRYSTART]; |
1327 | XT_PUTENTRY(xad, split->flag, split->off, split->len, | 1323 | XT_PUTENTRY(xad, split->flag, split->off, split->len, |
1328 | split->addr); | 1324 | split->addr); |
1329 | 1325 | ||
1330 | rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); | 1326 | rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); |
1331 | 1327 | ||
1332 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1328 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1333 | /* rxtlck->lwm.offset = XTENTRYSTART; */ | 1329 | /* rxtlck->lwm.offset = XTENTRYSTART; */ |
1334 | rxtlck->lwm.length = 1; | 1330 | rxtlck->lwm.length = 1; |
1335 | } | 1331 | } |
1336 | 1332 | ||
1337 | *rmpp = rmp; | 1333 | *rmpp = rmp; |
1338 | *rbnp = rbn; | 1334 | *rbnp = rbn; |
1339 | 1335 | ||
1340 | jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); | 1336 | jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); |
1341 | return 0; | 1337 | return 0; |
1342 | } | 1338 | } |
1343 | 1339 | ||
1344 | /* | 1340 | /* |
1345 | * non-sequential insert (at possibly middle page) | 1341 | * non-sequential insert (at possibly middle page) |
1346 | */ | 1342 | */ |
1347 | 1343 | ||
1348 | /* | 1344 | /* |
1349 | * update previous pointer of old next/right page of <sp> | 1345 | * update previous pointer of old next/right page of <sp> |
1350 | */ | 1346 | */ |
1351 | if (nextbn != 0) { | 1347 | if (nextbn != 0) { |
1352 | XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); | 1348 | XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); |
1353 | if (rc) { | 1349 | if (rc) { |
1354 | XT_PUTPAGE(rmp); | 1350 | XT_PUTPAGE(rmp); |
1355 | goto clean_up; | 1351 | goto clean_up; |
1356 | } | 1352 | } |
1357 | 1353 | ||
1358 | BT_MARK_DIRTY(mp, ip); | 1354 | BT_MARK_DIRTY(mp, ip); |
1359 | /* | 1355 | /* |
1360 | * acquire a transaction lock on the next page; | 1356 | * acquire a transaction lock on the next page; |
1361 | * | 1357 | * |
1362 | * action:sibling pointer update; | 1358 | * action:sibling pointer update; |
1363 | */ | 1359 | */ |
1364 | if (!test_cflag(COMMIT_Nolink, ip)) | 1360 | if (!test_cflag(COMMIT_Nolink, ip)) |
1365 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); | 1361 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); |
1366 | 1362 | ||
1367 | p->header.prev = cpu_to_le64(rbn); | 1363 | p->header.prev = cpu_to_le64(rbn); |
1368 | 1364 | ||
1369 | /* sibling page may have been updated previously, or | 1365 | /* sibling page may have been updated previously, or |
1370 | * it may be updated later; | 1366 | * it may be updated later; |
1371 | */ | 1367 | */ |
1372 | 1368 | ||
1373 | XT_PUTPAGE(mp); | 1369 | XT_PUTPAGE(mp); |
1374 | } | 1370 | } |
1375 | 1371 | ||
1376 | /* | 1372 | /* |
1377 | * split the data between the split and new/right pages | 1373 | * split the data between the split and new/right pages |
1378 | */ | 1374 | */ |
1379 | maxentry = le16_to_cpu(sp->header.maxentry); | 1375 | maxentry = le16_to_cpu(sp->header.maxentry); |
1380 | middle = maxentry >> 1; | 1376 | middle = maxentry >> 1; |
1381 | righthalf = maxentry - middle; | 1377 | righthalf = maxentry - middle; |
1382 | 1378 | ||
1383 | /* | 1379 | /* |
1384 | * skip index in old split/left page - insert into left page: | 1380 | * skip index in old split/left page - insert into left page: |
1385 | */ | 1381 | */ |
1386 | if (skip <= middle) { | 1382 | if (skip <= middle) { |
1387 | /* move right half of split page to the new right page */ | 1383 | /* move right half of split page to the new right page */ |
1388 | memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], | 1384 | memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], |
1389 | righthalf << L2XTSLOTSIZE); | 1385 | righthalf << L2XTSLOTSIZE); |
1390 | 1386 | ||
1391 | /* shift right tail of left half to make room for new entry */ | 1387 | /* shift right tail of left half to make room for new entry */ |
1392 | if (skip < middle) | 1388 | if (skip < middle) |
1393 | memmove(&sp->xad[skip + 1], &sp->xad[skip], | 1389 | memmove(&sp->xad[skip + 1], &sp->xad[skip], |
1394 | (middle - skip) << L2XTSLOTSIZE); | 1390 | (middle - skip) << L2XTSLOTSIZE); |
1395 | 1391 | ||
1396 | /* insert new entry */ | 1392 | /* insert new entry */ |
1397 | xad = &sp->xad[skip]; | 1393 | xad = &sp->xad[skip]; |
1398 | XT_PUTENTRY(xad, split->flag, split->off, split->len, | 1394 | XT_PUTENTRY(xad, split->flag, split->off, split->len, |
1399 | split->addr); | 1395 | split->addr); |
1400 | 1396 | ||
1401 | /* update page header */ | 1397 | /* update page header */ |
1402 | sp->header.nextindex = cpu_to_le16(middle + 1); | 1398 | sp->header.nextindex = cpu_to_le16(middle + 1); |
1403 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1399 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1404 | sxtlck->lwm.offset = (sxtlck->lwm.offset) ? | 1400 | sxtlck->lwm.offset = (sxtlck->lwm.offset) ? |
1405 | min(skip, (int)sxtlck->lwm.offset) : skip; | 1401 | min(skip, (int)sxtlck->lwm.offset) : skip; |
1406 | } | 1402 | } |
1407 | 1403 | ||
1408 | rp->header.nextindex = | 1404 | rp->header.nextindex = |
1409 | cpu_to_le16(XTENTRYSTART + righthalf); | 1405 | cpu_to_le16(XTENTRYSTART + righthalf); |
1410 | } | 1406 | } |
1411 | /* | 1407 | /* |
1412 | * skip index in new right page - insert into right page: | 1408 | * skip index in new right page - insert into right page: |
1413 | */ | 1409 | */ |
1414 | else { | 1410 | else { |
1415 | /* move left head of right half to right page */ | 1411 | /* move left head of right half to right page */ |
1416 | n = skip - middle; | 1412 | n = skip - middle; |
1417 | memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], | 1413 | memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], |
1418 | n << L2XTSLOTSIZE); | 1414 | n << L2XTSLOTSIZE); |
1419 | 1415 | ||
1420 | /* insert new entry */ | 1416 | /* insert new entry */ |
1421 | n += XTENTRYSTART; | 1417 | n += XTENTRYSTART; |
1422 | xad = &rp->xad[n]; | 1418 | xad = &rp->xad[n]; |
1423 | XT_PUTENTRY(xad, split->flag, split->off, split->len, | 1419 | XT_PUTENTRY(xad, split->flag, split->off, split->len, |
1424 | split->addr); | 1420 | split->addr); |
1425 | 1421 | ||
1426 | /* move right tail of right half to right page */ | 1422 | /* move right tail of right half to right page */ |
1427 | if (skip < maxentry) | 1423 | if (skip < maxentry) |
1428 | memmove(&rp->xad[n + 1], &sp->xad[skip], | 1424 | memmove(&rp->xad[n + 1], &sp->xad[skip], |
1429 | (maxentry - skip) << L2XTSLOTSIZE); | 1425 | (maxentry - skip) << L2XTSLOTSIZE); |
1430 | 1426 | ||
1431 | /* update page header */ | 1427 | /* update page header */ |
1432 | sp->header.nextindex = cpu_to_le16(middle); | 1428 | sp->header.nextindex = cpu_to_le16(middle); |
1433 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1429 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1434 | sxtlck->lwm.offset = (sxtlck->lwm.offset) ? | 1430 | sxtlck->lwm.offset = (sxtlck->lwm.offset) ? |
1435 | min(middle, (int)sxtlck->lwm.offset) : middle; | 1431 | min(middle, (int)sxtlck->lwm.offset) : middle; |
1436 | } | 1432 | } |
1437 | 1433 | ||
1438 | rp->header.nextindex = cpu_to_le16(XTENTRYSTART + | 1434 | rp->header.nextindex = cpu_to_le16(XTENTRYSTART + |
1439 | righthalf + 1); | 1435 | righthalf + 1); |
1440 | } | 1436 | } |
1441 | 1437 | ||
1442 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1438 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1443 | sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - | 1439 | sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - |
1444 | sxtlck->lwm.offset; | 1440 | sxtlck->lwm.offset; |
1445 | 1441 | ||
1446 | /* rxtlck->lwm.offset = XTENTRYSTART; */ | 1442 | /* rxtlck->lwm.offset = XTENTRYSTART; */ |
1447 | rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - | 1443 | rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - |
1448 | XTENTRYSTART; | 1444 | XTENTRYSTART; |
1449 | } | 1445 | } |
1450 | 1446 | ||
1451 | *rmpp = rmp; | 1447 | *rmpp = rmp; |
1452 | *rbnp = rbn; | 1448 | *rbnp = rbn; |
1453 | 1449 | ||
1454 | jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); | 1450 | jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); |
1455 | return rc; | 1451 | return rc; |
1456 | 1452 | ||
1457 | clean_up: | 1453 | clean_up: |
1458 | 1454 | ||
1459 | /* Rollback quota allocation. */ | 1455 | /* Rollback quota allocation. */ |
1460 | if (quota_allocation) | 1456 | if (quota_allocation) |
1461 | DQUOT_FREE_BLOCK(ip, quota_allocation); | 1457 | DQUOT_FREE_BLOCK(ip, quota_allocation); |
1462 | 1458 | ||
1463 | return (rc); | 1459 | return (rc); |
1464 | } | 1460 | } |
1465 | 1461 | ||
1466 | 1462 | ||
1467 | /* | 1463 | /* |
1468 | * xtSplitRoot() | 1464 | * xtSplitRoot() |
1469 | * | 1465 | * |
1470 | * function: | 1466 | * function: |
1471 | * split the full root page into original/root/split page and new | 1467 | * split the full root page into original/root/split page and new |
1472 | * right page | 1468 | * right page |
1473 | * i.e., root remains fixed in tree anchor (inode) and the root is | 1469 | * i.e., root remains fixed in tree anchor (inode) and the root is |
1474 | * copied to a single new right child page since root page << | 1470 | * copied to a single new right child page since root page << |
1475 | * non-root page, and the split root page contains a single entry | 1471 | * non-root page, and the split root page contains a single entry |
1476 | * for the new right child page. | 1472 | * for the new right child page. |
1477 | * | 1473 | * |
1478 | * parameter: | 1474 | * parameter: |
1479 | * int tid, | 1475 | * int tid, |
1480 | * struct inode *ip, | 1476 | * struct inode *ip, |
1481 | * struct xtsplit *split, | 1477 | * struct xtsplit *split, |
1482 | * struct metapage **rmpp) | 1478 | * struct metapage **rmpp) |
1483 | * | 1479 | * |
1484 | * return: | 1480 | * return: |
1485 | * Pointer to page in which to insert or NULL on error. | 1481 | * Pointer to page in which to insert or NULL on error. |
1486 | */ | 1482 | */ |
1487 | static int | 1483 | static int |
1488 | xtSplitRoot(tid_t tid, | 1484 | xtSplitRoot(tid_t tid, |
1489 | struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) | 1485 | struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) |
1490 | { | 1486 | { |
1491 | xtpage_t *sp; | 1487 | xtpage_t *sp; |
1492 | struct metapage *rmp; | 1488 | struct metapage *rmp; |
1493 | xtpage_t *rp; | 1489 | xtpage_t *rp; |
1494 | s64 rbn; | 1490 | s64 rbn; |
1495 | int skip, nextindex; | 1491 | int skip, nextindex; |
1496 | xad_t *xad; | 1492 | xad_t *xad; |
1497 | pxd_t *pxd; | 1493 | pxd_t *pxd; |
1498 | struct pxdlist *pxdlist; | 1494 | struct pxdlist *pxdlist; |
1499 | struct tlock *tlck; | 1495 | struct tlock *tlck; |
1500 | struct xtlock *xtlck; | 1496 | struct xtlock *xtlck; |
1501 | 1497 | ||
1502 | sp = &JFS_IP(ip)->i_xtroot; | 1498 | sp = &JFS_IP(ip)->i_xtroot; |
1503 | 1499 | ||
1504 | INCREMENT(xtStat.split); | 1500 | INCREMENT(xtStat.split); |
1505 | 1501 | ||
1506 | /* | 1502 | /* |
1507 | * allocate a single (right) child page | 1503 | * allocate a single (right) child page |
1508 | */ | 1504 | */ |
1509 | pxdlist = split->pxdlist; | 1505 | pxdlist = split->pxdlist; |
1510 | pxd = &pxdlist->pxd[pxdlist->npxd]; | 1506 | pxd = &pxdlist->pxd[pxdlist->npxd]; |
1511 | pxdlist->npxd++; | 1507 | pxdlist->npxd++; |
1512 | rbn = addressPXD(pxd); | 1508 | rbn = addressPXD(pxd); |
1513 | rmp = get_metapage(ip, rbn, PSIZE, 1); | 1509 | rmp = get_metapage(ip, rbn, PSIZE, 1); |
1514 | if (rmp == NULL) | 1510 | if (rmp == NULL) |
1515 | return -EIO; | 1511 | return -EIO; |
1516 | 1512 | ||
1517 | /* Allocate blocks to quota. */ | 1513 | /* Allocate blocks to quota. */ |
1518 | if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { | 1514 | if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { |
1519 | release_metapage(rmp); | 1515 | release_metapage(rmp); |
1520 | return -EDQUOT; | 1516 | return -EDQUOT; |
1521 | } | 1517 | } |
1522 | 1518 | ||
1523 | jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); | 1519 | jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); |
1524 | 1520 | ||
1525 | /* | 1521 | /* |
1526 | * acquire a transaction lock on the new right page; | 1522 | * acquire a transaction lock on the new right page; |
1527 | * | 1523 | * |
1528 | * action: new page; | 1524 | * action: new page; |
1529 | */ | 1525 | */ |
1530 | BT_MARK_DIRTY(rmp, ip); | 1526 | BT_MARK_DIRTY(rmp, ip); |
1531 | 1527 | ||
1532 | rp = (xtpage_t *) rmp->data; | 1528 | rp = (xtpage_t *) rmp->data; |
1533 | rp->header.flag = | 1529 | rp->header.flag = |
1534 | (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; | 1530 | (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; |
1535 | rp->header.self = *pxd; | 1531 | rp->header.self = *pxd; |
1536 | rp->header.nextindex = cpu_to_le16(XTENTRYSTART); | 1532 | rp->header.nextindex = cpu_to_le16(XTENTRYSTART); |
1537 | rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); | 1533 | rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); |
1538 | 1534 | ||
1539 | /* initialize sibling pointers */ | 1535 | /* initialize sibling pointers */ |
1540 | rp->header.next = 0; | 1536 | rp->header.next = 0; |
1541 | rp->header.prev = 0; | 1537 | rp->header.prev = 0; |
1542 | 1538 | ||
1543 | /* | 1539 | /* |
1544 | * copy the in-line root page into new right page extent | 1540 | * copy the in-line root page into new right page extent |
1545 | */ | 1541 | */ |
1546 | nextindex = le16_to_cpu(sp->header.maxentry); | 1542 | nextindex = le16_to_cpu(sp->header.maxentry); |
1547 | memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], | 1543 | memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], |
1548 | (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); | 1544 | (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); |
1549 | 1545 | ||
1550 | /* | 1546 | /* |
1551 | * insert the new entry into the new right/child page | 1547 | * insert the new entry into the new right/child page |
1552 | * (skip index in the new right page will not change) | 1548 | * (skip index in the new right page will not change) |
1553 | */ | 1549 | */ |
1554 | skip = split->index; | 1550 | skip = split->index; |
1555 | /* if insert into middle, shift right remaining entries */ | 1551 | /* if insert into middle, shift right remaining entries */ |
1556 | if (skip != nextindex) | 1552 | if (skip != nextindex) |
1557 | memmove(&rp->xad[skip + 1], &rp->xad[skip], | 1553 | memmove(&rp->xad[skip + 1], &rp->xad[skip], |
1558 | (nextindex - skip) * sizeof(xad_t)); | 1554 | (nextindex - skip) * sizeof(xad_t)); |
1559 | 1555 | ||
1560 | xad = &rp->xad[skip]; | 1556 | xad = &rp->xad[skip]; |
1561 | XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); | 1557 | XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); |
1562 | 1558 | ||
1563 | /* update page header */ | 1559 | /* update page header */ |
1564 | rp->header.nextindex = cpu_to_le16(nextindex + 1); | 1560 | rp->header.nextindex = cpu_to_le16(nextindex + 1); |
1565 | 1561 | ||
1566 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1562 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1567 | tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); | 1563 | tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); |
1568 | xtlck = (struct xtlock *) & tlck->lock; | 1564 | xtlck = (struct xtlock *) & tlck->lock; |
1569 | xtlck->lwm.offset = XTENTRYSTART; | 1565 | xtlck->lwm.offset = XTENTRYSTART; |
1570 | xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - | 1566 | xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - |
1571 | XTENTRYSTART; | 1567 | XTENTRYSTART; |
1572 | } | 1568 | } |
1573 | 1569 | ||
1574 | /* | 1570 | /* |
1575 | * reset the root | 1571 | * reset the root |
1576 | * | 1572 | * |
1577 | * init root with the single entry for the new right page | 1573 | * init root with the single entry for the new right page |
1578 | * set the 1st entry offset to 0, which force the left-most key | 1574 | * set the 1st entry offset to 0, which force the left-most key |
1579 | * at any level of the tree to be less than any search key. | 1575 | * at any level of the tree to be less than any search key. |
1580 | */ | 1576 | */ |
1581 | /* | 1577 | /* |
1582 | * acquire a transaction lock on the root page (in-memory inode); | 1578 | * acquire a transaction lock on the root page (in-memory inode); |
1583 | * | 1579 | * |
1584 | * action: root split; | 1580 | * action: root split; |
1585 | */ | 1581 | */ |
1586 | BT_MARK_DIRTY(split->mp, ip); | 1582 | BT_MARK_DIRTY(split->mp, ip); |
1587 | 1583 | ||
1588 | xad = &sp->xad[XTENTRYSTART]; | 1584 | xad = &sp->xad[XTENTRYSTART]; |
1589 | XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); | 1585 | XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); |
1590 | 1586 | ||
1591 | /* update page header of root */ | 1587 | /* update page header of root */ |
1592 | sp->header.flag &= ~BT_LEAF; | 1588 | sp->header.flag &= ~BT_LEAF; |
1593 | sp->header.flag |= BT_INTERNAL; | 1589 | sp->header.flag |= BT_INTERNAL; |
1594 | 1590 | ||
1595 | sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); | 1591 | sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); |
1596 | 1592 | ||
1597 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1593 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1598 | tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); | 1594 | tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); |
1599 | xtlck = (struct xtlock *) & tlck->lock; | 1595 | xtlck = (struct xtlock *) & tlck->lock; |
1600 | xtlck->lwm.offset = XTENTRYSTART; | 1596 | xtlck->lwm.offset = XTENTRYSTART; |
1601 | xtlck->lwm.length = 1; | 1597 | xtlck->lwm.length = 1; |
1602 | } | 1598 | } |
1603 | 1599 | ||
1604 | *rmpp = rmp; | 1600 | *rmpp = rmp; |
1605 | 1601 | ||
1606 | jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); | 1602 | jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); |
1607 | return 0; | 1603 | return 0; |
1608 | } | 1604 | } |
1609 | 1605 | ||
1610 | 1606 | ||
1611 | /* | 1607 | /* |
1612 | * xtExtend() | 1608 | * xtExtend() |
1613 | * | 1609 | * |
1614 | * function: extend in-place; | 1610 | * function: extend in-place; |
1615 | * | 1611 | * |
1616 | * note: existing extent may or may not have been committed. | 1612 | * note: existing extent may or may not have been committed. |
1617 | * caller is responsible for pager buffer cache update, and | 1613 | * caller is responsible for pager buffer cache update, and |
1618 | * working block allocation map update; | 1614 | * working block allocation map update; |
1619 | * update pmap: alloc whole extended extent; | 1615 | * update pmap: alloc whole extended extent; |
1620 | */ | 1616 | */ |
1621 | int xtExtend(tid_t tid, /* transaction id */ | 1617 | int xtExtend(tid_t tid, /* transaction id */ |
1622 | struct inode *ip, s64 xoff, /* delta extent offset */ | 1618 | struct inode *ip, s64 xoff, /* delta extent offset */ |
1623 | s32 xlen, /* delta extent length */ | 1619 | s32 xlen, /* delta extent length */ |
1624 | int flag) | 1620 | int flag) |
1625 | { | 1621 | { |
1626 | int rc = 0; | 1622 | int rc = 0; |
1627 | int cmp; | 1623 | int cmp; |
1628 | struct metapage *mp; /* meta-page buffer */ | 1624 | struct metapage *mp; /* meta-page buffer */ |
1629 | xtpage_t *p; /* base B+-tree index page */ | 1625 | xtpage_t *p; /* base B+-tree index page */ |
1630 | s64 bn; | 1626 | s64 bn; |
1631 | int index, nextindex, len; | 1627 | int index, nextindex, len; |
1632 | struct btstack btstack; /* traverse stack */ | 1628 | struct btstack btstack; /* traverse stack */ |
1633 | struct xtsplit split; /* split information */ | 1629 | struct xtsplit split; /* split information */ |
1634 | xad_t *xad; | 1630 | xad_t *xad; |
1635 | s64 xaddr; | 1631 | s64 xaddr; |
1636 | struct tlock *tlck; | 1632 | struct tlock *tlck; |
1637 | struct xtlock *xtlck = NULL; | 1633 | struct xtlock *xtlck = NULL; |
1638 | 1634 | ||
1639 | jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); | 1635 | jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); |
1640 | 1636 | ||
1641 | /* there must exist extent to be extended */ | 1637 | /* there must exist extent to be extended */ |
1642 | if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) | 1638 | if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) |
1643 | return rc; | 1639 | return rc; |
1644 | 1640 | ||
1645 | /* retrieve search result */ | 1641 | /* retrieve search result */ |
1646 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); | 1642 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); |
1647 | 1643 | ||
1648 | if (cmp != 0) { | 1644 | if (cmp != 0) { |
1649 | XT_PUTPAGE(mp); | 1645 | XT_PUTPAGE(mp); |
1650 | jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); | 1646 | jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); |
1651 | return -EIO; | 1647 | return -EIO; |
1652 | } | 1648 | } |
1653 | 1649 | ||
1654 | /* extension must be contiguous */ | 1650 | /* extension must be contiguous */ |
1655 | xad = &p->xad[index]; | 1651 | xad = &p->xad[index]; |
1656 | if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { | 1652 | if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { |
1657 | XT_PUTPAGE(mp); | 1653 | XT_PUTPAGE(mp); |
1658 | jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); | 1654 | jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); |
1659 | return -EIO; | 1655 | return -EIO; |
1660 | } | 1656 | } |
1661 | 1657 | ||
1662 | /* | 1658 | /* |
1663 | * acquire a transaction lock on the leaf page; | 1659 | * acquire a transaction lock on the leaf page; |
1664 | * | 1660 | * |
1665 | * action: xad insertion/extension; | 1661 | * action: xad insertion/extension; |
1666 | */ | 1662 | */ |
1667 | BT_MARK_DIRTY(mp, ip); | 1663 | BT_MARK_DIRTY(mp, ip); |
1668 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1664 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1669 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); | 1665 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); |
1670 | xtlck = (struct xtlock *) & tlck->lock; | 1666 | xtlck = (struct xtlock *) & tlck->lock; |
1671 | } | 1667 | } |
1672 | 1668 | ||
1673 | /* extend will overflow extent ? */ | 1669 | /* extend will overflow extent ? */ |
1674 | xlen = lengthXAD(xad) + xlen; | 1670 | xlen = lengthXAD(xad) + xlen; |
1675 | if ((len = xlen - MAXXLEN) <= 0) | 1671 | if ((len = xlen - MAXXLEN) <= 0) |
1676 | goto extendOld; | 1672 | goto extendOld; |
1677 | 1673 | ||
1678 | /* | 1674 | /* |
1679 | * extent overflow: insert entry for new extent | 1675 | * extent overflow: insert entry for new extent |
1680 | */ | 1676 | */ |
1681 | //insertNew: | 1677 | //insertNew: |
1682 | xoff = offsetXAD(xad) + MAXXLEN; | 1678 | xoff = offsetXAD(xad) + MAXXLEN; |
1683 | xaddr = addressXAD(xad) + MAXXLEN; | 1679 | xaddr = addressXAD(xad) + MAXXLEN; |
1684 | nextindex = le16_to_cpu(p->header.nextindex); | 1680 | nextindex = le16_to_cpu(p->header.nextindex); |
1685 | 1681 | ||
1686 | /* | 1682 | /* |
1687 | * if the leaf page is full, insert the new entry and | 1683 | * if the leaf page is full, insert the new entry and |
1688 | * propagate up the router entry for the new page from split | 1684 | * propagate up the router entry for the new page from split |
1689 | * | 1685 | * |
1690 | * The xtSplitUp() will insert the entry and unpin the leaf page. | 1686 | * The xtSplitUp() will insert the entry and unpin the leaf page. |
1691 | */ | 1687 | */ |
1692 | if (nextindex == le16_to_cpu(p->header.maxentry)) { | 1688 | if (nextindex == le16_to_cpu(p->header.maxentry)) { |
1693 | /* xtSpliUp() unpins leaf pages */ | 1689 | /* xtSpliUp() unpins leaf pages */ |
1694 | split.mp = mp; | 1690 | split.mp = mp; |
1695 | split.index = index + 1; | 1691 | split.index = index + 1; |
1696 | split.flag = XAD_NEW; | 1692 | split.flag = XAD_NEW; |
1697 | split.off = xoff; /* split offset */ | 1693 | split.off = xoff; /* split offset */ |
1698 | split.len = len; | 1694 | split.len = len; |
1699 | split.addr = xaddr; | 1695 | split.addr = xaddr; |
1700 | split.pxdlist = NULL; | 1696 | split.pxdlist = NULL; |
1701 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) | 1697 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) |
1702 | return rc; | 1698 | return rc; |
1703 | 1699 | ||
1704 | /* get back old page */ | 1700 | /* get back old page */ |
1705 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 1701 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
1706 | if (rc) | 1702 | if (rc) |
1707 | return rc; | 1703 | return rc; |
1708 | /* | 1704 | /* |
1709 | * if leaf root has been split, original root has been | 1705 | * if leaf root has been split, original root has been |
1710 | * copied to new child page, i.e., original entry now | 1706 | * copied to new child page, i.e., original entry now |
1711 | * resides on the new child page; | 1707 | * resides on the new child page; |
1712 | */ | 1708 | */ |
1713 | if (p->header.flag & BT_INTERNAL) { | 1709 | if (p->header.flag & BT_INTERNAL) { |
1714 | ASSERT(p->header.nextindex == | 1710 | ASSERT(p->header.nextindex == |
1715 | cpu_to_le16(XTENTRYSTART + 1)); | 1711 | cpu_to_le16(XTENTRYSTART + 1)); |
1716 | xad = &p->xad[XTENTRYSTART]; | 1712 | xad = &p->xad[XTENTRYSTART]; |
1717 | bn = addressXAD(xad); | 1713 | bn = addressXAD(xad); |
1718 | XT_PUTPAGE(mp); | 1714 | XT_PUTPAGE(mp); |
1719 | 1715 | ||
1720 | /* get new child page */ | 1716 | /* get new child page */ |
1721 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 1717 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
1722 | if (rc) | 1718 | if (rc) |
1723 | return rc; | 1719 | return rc; |
1724 | 1720 | ||
1725 | BT_MARK_DIRTY(mp, ip); | 1721 | BT_MARK_DIRTY(mp, ip); |
1726 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1722 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1727 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); | 1723 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); |
1728 | xtlck = (struct xtlock *) & tlck->lock; | 1724 | xtlck = (struct xtlock *) & tlck->lock; |
1729 | } | 1725 | } |
1730 | } | 1726 | } |
1731 | } | 1727 | } |
1732 | /* | 1728 | /* |
1733 | * insert the new entry into the leaf page | 1729 | * insert the new entry into the leaf page |
1734 | */ | 1730 | */ |
1735 | else { | 1731 | else { |
1736 | /* insert the new entry: mark the entry NEW */ | 1732 | /* insert the new entry: mark the entry NEW */ |
1737 | xad = &p->xad[index + 1]; | 1733 | xad = &p->xad[index + 1]; |
1738 | XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); | 1734 | XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); |
1739 | 1735 | ||
1740 | /* advance next available entry index */ | 1736 | /* advance next available entry index */ |
1741 | p->header.nextindex = | 1737 | le16_add_cpu(&p->header.nextindex, 1); |
1742 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
1743 | } | 1738 | } |
1744 | 1739 | ||
1745 | /* get back old entry */ | 1740 | /* get back old entry */ |
1746 | xad = &p->xad[index]; | 1741 | xad = &p->xad[index]; |
1747 | xlen = MAXXLEN; | 1742 | xlen = MAXXLEN; |
1748 | 1743 | ||
1749 | /* | 1744 | /* |
1750 | * extend old extent | 1745 | * extend old extent |
1751 | */ | 1746 | */ |
1752 | extendOld: | 1747 | extendOld: |
1753 | XADlength(xad, xlen); | 1748 | XADlength(xad, xlen); |
1754 | if (!(xad->flag & XAD_NEW)) | 1749 | if (!(xad->flag & XAD_NEW)) |
1755 | xad->flag |= XAD_EXTENDED; | 1750 | xad->flag |= XAD_EXTENDED; |
1756 | 1751 | ||
1757 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1752 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1758 | xtlck->lwm.offset = | 1753 | xtlck->lwm.offset = |
1759 | (xtlck->lwm.offset) ? min(index, | 1754 | (xtlck->lwm.offset) ? min(index, |
1760 | (int)xtlck->lwm.offset) : index; | 1755 | (int)xtlck->lwm.offset) : index; |
1761 | xtlck->lwm.length = | 1756 | xtlck->lwm.length = |
1762 | le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; | 1757 | le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; |
1763 | } | 1758 | } |
1764 | 1759 | ||
1765 | /* unpin the leaf page */ | 1760 | /* unpin the leaf page */ |
1766 | XT_PUTPAGE(mp); | 1761 | XT_PUTPAGE(mp); |
1767 | 1762 | ||
1768 | return rc; | 1763 | return rc; |
1769 | } | 1764 | } |
1770 | 1765 | ||
1771 | #ifdef _NOTYET | 1766 | #ifdef _NOTYET |
1772 | /* | 1767 | /* |
1773 | * xtTailgate() | 1768 | * xtTailgate() |
1774 | * | 1769 | * |
1775 | * function: split existing 'tail' extent | 1770 | * function: split existing 'tail' extent |
1776 | * (split offset >= start offset of tail extent), and | 1771 | * (split offset >= start offset of tail extent), and |
1777 | * relocate and extend the split tail half; | 1772 | * relocate and extend the split tail half; |
1778 | * | 1773 | * |
1779 | * note: existing extent may or may not have been committed. | 1774 | * note: existing extent may or may not have been committed. |
1780 | * caller is responsible for pager buffer cache update, and | 1775 | * caller is responsible for pager buffer cache update, and |
1781 | * working block allocation map update; | 1776 | * working block allocation map update; |
1782 | * update pmap: free old split tail extent, alloc new extent; | 1777 | * update pmap: free old split tail extent, alloc new extent; |
1783 | */ | 1778 | */ |
1784 | int xtTailgate(tid_t tid, /* transaction id */ | 1779 | int xtTailgate(tid_t tid, /* transaction id */ |
1785 | struct inode *ip, s64 xoff, /* split/new extent offset */ | 1780 | struct inode *ip, s64 xoff, /* split/new extent offset */ |
1786 | s32 xlen, /* new extent length */ | 1781 | s32 xlen, /* new extent length */ |
1787 | s64 xaddr, /* new extent address */ | 1782 | s64 xaddr, /* new extent address */ |
1788 | int flag) | 1783 | int flag) |
1789 | { | 1784 | { |
1790 | int rc = 0; | 1785 | int rc = 0; |
1791 | int cmp; | 1786 | int cmp; |
1792 | struct metapage *mp; /* meta-page buffer */ | 1787 | struct metapage *mp; /* meta-page buffer */ |
1793 | xtpage_t *p; /* base B+-tree index page */ | 1788 | xtpage_t *p; /* base B+-tree index page */ |
1794 | s64 bn; | 1789 | s64 bn; |
1795 | int index, nextindex, llen, rlen; | 1790 | int index, nextindex, llen, rlen; |
1796 | struct btstack btstack; /* traverse stack */ | 1791 | struct btstack btstack; /* traverse stack */ |
1797 | struct xtsplit split; /* split information */ | 1792 | struct xtsplit split; /* split information */ |
1798 | xad_t *xad; | 1793 | xad_t *xad; |
1799 | struct tlock *tlck; | 1794 | struct tlock *tlck; |
1800 | struct xtlock *xtlck = 0; | 1795 | struct xtlock *xtlck = 0; |
1801 | struct tlock *mtlck; | 1796 | struct tlock *mtlck; |
1802 | struct maplock *pxdlock; | 1797 | struct maplock *pxdlock; |
1803 | 1798 | ||
1804 | /* | 1799 | /* |
1805 | printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", | 1800 | printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", |
1806 | (ulong)xoff, xlen, (ulong)xaddr); | 1801 | (ulong)xoff, xlen, (ulong)xaddr); |
1807 | */ | 1802 | */ |
1808 | 1803 | ||
1809 | /* there must exist extent to be tailgated */ | 1804 | /* there must exist extent to be tailgated */ |
1810 | if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) | 1805 | if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) |
1811 | return rc; | 1806 | return rc; |
1812 | 1807 | ||
1813 | /* retrieve search result */ | 1808 | /* retrieve search result */ |
1814 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); | 1809 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); |
1815 | 1810 | ||
1816 | if (cmp != 0) { | 1811 | if (cmp != 0) { |
1817 | XT_PUTPAGE(mp); | 1812 | XT_PUTPAGE(mp); |
1818 | jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); | 1813 | jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); |
1819 | return -EIO; | 1814 | return -EIO; |
1820 | } | 1815 | } |
1821 | 1816 | ||
1822 | /* entry found must be last entry */ | 1817 | /* entry found must be last entry */ |
1823 | nextindex = le16_to_cpu(p->header.nextindex); | 1818 | nextindex = le16_to_cpu(p->header.nextindex); |
1824 | if (index != nextindex - 1) { | 1819 | if (index != nextindex - 1) { |
1825 | XT_PUTPAGE(mp); | 1820 | XT_PUTPAGE(mp); |
1826 | jfs_error(ip->i_sb, | 1821 | jfs_error(ip->i_sb, |
1827 | "xtTailgate: the entry found is not the last entry"); | 1822 | "xtTailgate: the entry found is not the last entry"); |
1828 | return -EIO; | 1823 | return -EIO; |
1829 | } | 1824 | } |
1830 | 1825 | ||
1831 | BT_MARK_DIRTY(mp, ip); | 1826 | BT_MARK_DIRTY(mp, ip); |
1832 | /* | 1827 | /* |
1833 | * acquire tlock of the leaf page containing original entry | 1828 | * acquire tlock of the leaf page containing original entry |
1834 | */ | 1829 | */ |
1835 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1830 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1836 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); | 1831 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); |
1837 | xtlck = (struct xtlock *) & tlck->lock; | 1832 | xtlck = (struct xtlock *) & tlck->lock; |
1838 | } | 1833 | } |
1839 | 1834 | ||
1840 | /* completely replace extent ? */ | 1835 | /* completely replace extent ? */ |
1841 | xad = &p->xad[index]; | 1836 | xad = &p->xad[index]; |
1842 | /* | 1837 | /* |
1843 | printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", | 1838 | printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", |
1844 | (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); | 1839 | (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); |
1845 | */ | 1840 | */ |
1846 | if ((llen = xoff - offsetXAD(xad)) == 0) | 1841 | if ((llen = xoff - offsetXAD(xad)) == 0) |
1847 | goto updateOld; | 1842 | goto updateOld; |
1848 | 1843 | ||
1849 | /* | 1844 | /* |
1850 | * partially replace extent: insert entry for new extent | 1845 | * partially replace extent: insert entry for new extent |
1851 | */ | 1846 | */ |
1852 | //insertNew: | 1847 | //insertNew: |
1853 | /* | 1848 | /* |
1854 | * if the leaf page is full, insert the new entry and | 1849 | * if the leaf page is full, insert the new entry and |
1855 | * propagate up the router entry for the new page from split | 1850 | * propagate up the router entry for the new page from split |
1856 | * | 1851 | * |
1857 | * The xtSplitUp() will insert the entry and unpin the leaf page. | 1852 | * The xtSplitUp() will insert the entry and unpin the leaf page. |
1858 | */ | 1853 | */ |
1859 | if (nextindex == le16_to_cpu(p->header.maxentry)) { | 1854 | if (nextindex == le16_to_cpu(p->header.maxentry)) { |
1860 | /* xtSpliUp() unpins leaf pages */ | 1855 | /* xtSpliUp() unpins leaf pages */ |
1861 | split.mp = mp; | 1856 | split.mp = mp; |
1862 | split.index = index + 1; | 1857 | split.index = index + 1; |
1863 | split.flag = XAD_NEW; | 1858 | split.flag = XAD_NEW; |
1864 | split.off = xoff; /* split offset */ | 1859 | split.off = xoff; /* split offset */ |
1865 | split.len = xlen; | 1860 | split.len = xlen; |
1866 | split.addr = xaddr; | 1861 | split.addr = xaddr; |
1867 | split.pxdlist = NULL; | 1862 | split.pxdlist = NULL; |
1868 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) | 1863 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) |
1869 | return rc; | 1864 | return rc; |
1870 | 1865 | ||
1871 | /* get back old page */ | 1866 | /* get back old page */ |
1872 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 1867 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
1873 | if (rc) | 1868 | if (rc) |
1874 | return rc; | 1869 | return rc; |
1875 | /* | 1870 | /* |
1876 | * if leaf root has been split, original root has been | 1871 | * if leaf root has been split, original root has been |
1877 | * copied to new child page, i.e., original entry now | 1872 | * copied to new child page, i.e., original entry now |
1878 | * resides on the new child page; | 1873 | * resides on the new child page; |
1879 | */ | 1874 | */ |
1880 | if (p->header.flag & BT_INTERNAL) { | 1875 | if (p->header.flag & BT_INTERNAL) { |
1881 | ASSERT(p->header.nextindex == | 1876 | ASSERT(p->header.nextindex == |
1882 | cpu_to_le16(XTENTRYSTART + 1)); | 1877 | cpu_to_le16(XTENTRYSTART + 1)); |
1883 | xad = &p->xad[XTENTRYSTART]; | 1878 | xad = &p->xad[XTENTRYSTART]; |
1884 | bn = addressXAD(xad); | 1879 | bn = addressXAD(xad); |
1885 | XT_PUTPAGE(mp); | 1880 | XT_PUTPAGE(mp); |
1886 | 1881 | ||
1887 | /* get new child page */ | 1882 | /* get new child page */ |
1888 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 1883 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
1889 | if (rc) | 1884 | if (rc) |
1890 | return rc; | 1885 | return rc; |
1891 | 1886 | ||
1892 | BT_MARK_DIRTY(mp, ip); | 1887 | BT_MARK_DIRTY(mp, ip); |
1893 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1888 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1894 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); | 1889 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); |
1895 | xtlck = (struct xtlock *) & tlck->lock; | 1890 | xtlck = (struct xtlock *) & tlck->lock; |
1896 | } | 1891 | } |
1897 | } | 1892 | } |
1898 | } | 1893 | } |
1899 | /* | 1894 | /* |
1900 | * insert the new entry into the leaf page | 1895 | * insert the new entry into the leaf page |
1901 | */ | 1896 | */ |
1902 | else { | 1897 | else { |
1903 | /* insert the new entry: mark the entry NEW */ | 1898 | /* insert the new entry: mark the entry NEW */ |
1904 | xad = &p->xad[index + 1]; | 1899 | xad = &p->xad[index + 1]; |
1905 | XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); | 1900 | XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); |
1906 | 1901 | ||
1907 | /* advance next available entry index */ | 1902 | /* advance next available entry index */ |
1908 | p->header.nextindex = | 1903 | le16_add_cpu(&p->header.nextindex, 1); |
1909 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
1910 | } | 1904 | } |
1911 | 1905 | ||
1912 | /* get back old XAD */ | 1906 | /* get back old XAD */ |
1913 | xad = &p->xad[index]; | 1907 | xad = &p->xad[index]; |
1914 | 1908 | ||
1915 | /* | 1909 | /* |
1916 | * truncate/relocate old extent at split offset | 1910 | * truncate/relocate old extent at split offset |
1917 | */ | 1911 | */ |
1918 | updateOld: | 1912 | updateOld: |
1919 | /* update dmap for old/committed/truncated extent */ | 1913 | /* update dmap for old/committed/truncated extent */ |
1920 | rlen = lengthXAD(xad) - llen; | 1914 | rlen = lengthXAD(xad) - llen; |
1921 | if (!(xad->flag & XAD_NEW)) { | 1915 | if (!(xad->flag & XAD_NEW)) { |
1922 | /* free from PWMAP at commit */ | 1916 | /* free from PWMAP at commit */ |
1923 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1917 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1924 | mtlck = txMaplock(tid, ip, tlckMAP); | 1918 | mtlck = txMaplock(tid, ip, tlckMAP); |
1925 | pxdlock = (struct maplock *) & mtlck->lock; | 1919 | pxdlock = (struct maplock *) & mtlck->lock; |
1926 | pxdlock->flag = mlckFREEPXD; | 1920 | pxdlock->flag = mlckFREEPXD; |
1927 | PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); | 1921 | PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); |
1928 | PXDlength(&pxdlock->pxd, rlen); | 1922 | PXDlength(&pxdlock->pxd, rlen); |
1929 | pxdlock->index = 1; | 1923 | pxdlock->index = 1; |
1930 | } | 1924 | } |
1931 | } else | 1925 | } else |
1932 | /* free from WMAP */ | 1926 | /* free from WMAP */ |
1933 | dbFree(ip, addressXAD(xad) + llen, (s64) rlen); | 1927 | dbFree(ip, addressXAD(xad) + llen, (s64) rlen); |
1934 | 1928 | ||
1935 | if (llen) | 1929 | if (llen) |
1936 | /* truncate */ | 1930 | /* truncate */ |
1937 | XADlength(xad, llen); | 1931 | XADlength(xad, llen); |
1938 | else | 1932 | else |
1939 | /* replace */ | 1933 | /* replace */ |
1940 | XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); | 1934 | XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); |
1941 | 1935 | ||
1942 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1936 | if (!test_cflag(COMMIT_Nolink, ip)) { |
1943 | xtlck->lwm.offset = (xtlck->lwm.offset) ? | 1937 | xtlck->lwm.offset = (xtlck->lwm.offset) ? |
1944 | min(index, (int)xtlck->lwm.offset) : index; | 1938 | min(index, (int)xtlck->lwm.offset) : index; |
1945 | xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - | 1939 | xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - |
1946 | xtlck->lwm.offset; | 1940 | xtlck->lwm.offset; |
1947 | } | 1941 | } |
1948 | 1942 | ||
1949 | /* unpin the leaf page */ | 1943 | /* unpin the leaf page */ |
1950 | XT_PUTPAGE(mp); | 1944 | XT_PUTPAGE(mp); |
1951 | 1945 | ||
1952 | return rc; | 1946 | return rc; |
1953 | } | 1947 | } |
1954 | #endif /* _NOTYET */ | 1948 | #endif /* _NOTYET */ |
1955 | 1949 | ||
1956 | /* | 1950 | /* |
1957 | * xtUpdate() | 1951 | * xtUpdate() |
1958 | * | 1952 | * |
1959 | * function: update XAD; | 1953 | * function: update XAD; |
1960 | * | 1954 | * |
1961 | * update extent for allocated_but_not_recorded or | 1955 | * update extent for allocated_but_not_recorded or |
1962 | * compressed extent; | 1956 | * compressed extent; |
1963 | * | 1957 | * |
1964 | * parameter: | 1958 | * parameter: |
1965 | * nxad - new XAD; | 1959 | * nxad - new XAD; |
1966 | * logical extent of the specified XAD must be completely | 1960 | * logical extent of the specified XAD must be completely |
1967 | * contained by an existing XAD; | 1961 | * contained by an existing XAD; |
1968 | */ | 1962 | */ |
1969 | int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | 1963 | int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) |
1970 | { /* new XAD */ | 1964 | { /* new XAD */ |
1971 | int rc = 0; | 1965 | int rc = 0; |
1972 | int cmp; | 1966 | int cmp; |
1973 | struct metapage *mp; /* meta-page buffer */ | 1967 | struct metapage *mp; /* meta-page buffer */ |
1974 | xtpage_t *p; /* base B+-tree index page */ | 1968 | xtpage_t *p; /* base B+-tree index page */ |
1975 | s64 bn; | 1969 | s64 bn; |
1976 | int index0, index, newindex, nextindex; | 1970 | int index0, index, newindex, nextindex; |
1977 | struct btstack btstack; /* traverse stack */ | 1971 | struct btstack btstack; /* traverse stack */ |
1978 | struct xtsplit split; /* split information */ | 1972 | struct xtsplit split; /* split information */ |
1979 | xad_t *xad, *lxad, *rxad; | 1973 | xad_t *xad, *lxad, *rxad; |
1980 | int xflag; | 1974 | int xflag; |
1981 | s64 nxoff, xoff; | 1975 | s64 nxoff, xoff; |
1982 | int nxlen, xlen, lxlen, rxlen; | 1976 | int nxlen, xlen, lxlen, rxlen; |
1983 | s64 nxaddr, xaddr; | 1977 | s64 nxaddr, xaddr; |
1984 | struct tlock *tlck; | 1978 | struct tlock *tlck; |
1985 | struct xtlock *xtlck = NULL; | 1979 | struct xtlock *xtlck = NULL; |
1986 | int newpage = 0; | 1980 | int newpage = 0; |
1987 | 1981 | ||
1988 | /* there must exist extent to be tailgated */ | 1982 | /* there must exist extent to be tailgated */ |
1989 | nxoff = offsetXAD(nxad); | 1983 | nxoff = offsetXAD(nxad); |
1990 | nxlen = lengthXAD(nxad); | 1984 | nxlen = lengthXAD(nxad); |
1991 | nxaddr = addressXAD(nxad); | 1985 | nxaddr = addressXAD(nxad); |
1992 | 1986 | ||
1993 | if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) | 1987 | if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) |
1994 | return rc; | 1988 | return rc; |
1995 | 1989 | ||
1996 | /* retrieve search result */ | 1990 | /* retrieve search result */ |
1997 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); | 1991 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); |
1998 | 1992 | ||
1999 | if (cmp != 0) { | 1993 | if (cmp != 0) { |
2000 | XT_PUTPAGE(mp); | 1994 | XT_PUTPAGE(mp); |
2001 | jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); | 1995 | jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); |
2002 | return -EIO; | 1996 | return -EIO; |
2003 | } | 1997 | } |
2004 | 1998 | ||
2005 | BT_MARK_DIRTY(mp, ip); | 1999 | BT_MARK_DIRTY(mp, ip); |
2006 | /* | 2000 | /* |
2007 | * acquire tlock of the leaf page containing original entry | 2001 | * acquire tlock of the leaf page containing original entry |
2008 | */ | 2002 | */ |
2009 | if (!test_cflag(COMMIT_Nolink, ip)) { | 2003 | if (!test_cflag(COMMIT_Nolink, ip)) { |
2010 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); | 2004 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); |
2011 | xtlck = (struct xtlock *) & tlck->lock; | 2005 | xtlck = (struct xtlock *) & tlck->lock; |
2012 | } | 2006 | } |
2013 | 2007 | ||
2014 | xad = &p->xad[index0]; | 2008 | xad = &p->xad[index0]; |
2015 | xflag = xad->flag; | 2009 | xflag = xad->flag; |
2016 | xoff = offsetXAD(xad); | 2010 | xoff = offsetXAD(xad); |
2017 | xlen = lengthXAD(xad); | 2011 | xlen = lengthXAD(xad); |
2018 | xaddr = addressXAD(xad); | 2012 | xaddr = addressXAD(xad); |
2019 | 2013 | ||
2020 | /* nXAD must be completely contained within XAD */ | 2014 | /* nXAD must be completely contained within XAD */ |
2021 | if ((xoff > nxoff) || | 2015 | if ((xoff > nxoff) || |
2022 | (nxoff + nxlen > xoff + xlen)) { | 2016 | (nxoff + nxlen > xoff + xlen)) { |
2023 | XT_PUTPAGE(mp); | 2017 | XT_PUTPAGE(mp); |
2024 | jfs_error(ip->i_sb, | 2018 | jfs_error(ip->i_sb, |
2025 | "xtUpdate: nXAD in not completely contained within XAD"); | 2019 | "xtUpdate: nXAD in not completely contained within XAD"); |
2026 | return -EIO; | 2020 | return -EIO; |
2027 | } | 2021 | } |
2028 | 2022 | ||
2029 | index = index0; | 2023 | index = index0; |
2030 | newindex = index + 1; | 2024 | newindex = index + 1; |
2031 | nextindex = le16_to_cpu(p->header.nextindex); | 2025 | nextindex = le16_to_cpu(p->header.nextindex); |
2032 | 2026 | ||
2033 | #ifdef _JFS_WIP_NOCOALESCE | 2027 | #ifdef _JFS_WIP_NOCOALESCE |
2034 | if (xoff < nxoff) | 2028 | if (xoff < nxoff) |
2035 | goto updateRight; | 2029 | goto updateRight; |
2036 | 2030 | ||
2037 | /* | 2031 | /* |
2038 | * replace XAD with nXAD | 2032 | * replace XAD with nXAD |
2039 | */ | 2033 | */ |
2040 | replace: /* (nxoff == xoff) */ | 2034 | replace: /* (nxoff == xoff) */ |
2041 | if (nxlen == xlen) { | 2035 | if (nxlen == xlen) { |
2042 | /* replace XAD with nXAD:recorded */ | 2036 | /* replace XAD with nXAD:recorded */ |
2043 | *xad = *nxad; | 2037 | *xad = *nxad; |
2044 | xad->flag = xflag & ~XAD_NOTRECORDED; | 2038 | xad->flag = xflag & ~XAD_NOTRECORDED; |
2045 | 2039 | ||
2046 | goto out; | 2040 | goto out; |
2047 | } else /* (nxlen < xlen) */ | 2041 | } else /* (nxlen < xlen) */ |
2048 | goto updateLeft; | 2042 | goto updateLeft; |
2049 | #endif /* _JFS_WIP_NOCOALESCE */ | 2043 | #endif /* _JFS_WIP_NOCOALESCE */ |
2050 | 2044 | ||
2051 | /* #ifdef _JFS_WIP_COALESCE */ | 2045 | /* #ifdef _JFS_WIP_COALESCE */ |
2052 | if (xoff < nxoff) | 2046 | if (xoff < nxoff) |
2053 | goto coalesceRight; | 2047 | goto coalesceRight; |
2054 | 2048 | ||
2055 | /* | 2049 | /* |
2056 | * coalesce with left XAD | 2050 | * coalesce with left XAD |
2057 | */ | 2051 | */ |
2058 | //coalesceLeft: /* (xoff == nxoff) */ | 2052 | //coalesceLeft: /* (xoff == nxoff) */ |
2059 | /* is XAD first entry of page ? */ | 2053 | /* is XAD first entry of page ? */ |
2060 | if (index == XTENTRYSTART) | 2054 | if (index == XTENTRYSTART) |
2061 | goto replace; | 2055 | goto replace; |
2062 | 2056 | ||
2063 | /* is nXAD logically and physically contiguous with lXAD ? */ | 2057 | /* is nXAD logically and physically contiguous with lXAD ? */ |
2064 | lxad = &p->xad[index - 1]; | 2058 | lxad = &p->xad[index - 1]; |
2065 | lxlen = lengthXAD(lxad); | 2059 | lxlen = lengthXAD(lxad); |
2066 | if (!(lxad->flag & XAD_NOTRECORDED) && | 2060 | if (!(lxad->flag & XAD_NOTRECORDED) && |
2067 | (nxoff == offsetXAD(lxad) + lxlen) && | 2061 | (nxoff == offsetXAD(lxad) + lxlen) && |
2068 | (nxaddr == addressXAD(lxad) + lxlen) && | 2062 | (nxaddr == addressXAD(lxad) + lxlen) && |
2069 | (lxlen + nxlen < MAXXLEN)) { | 2063 | (lxlen + nxlen < MAXXLEN)) { |
2070 | /* extend right lXAD */ | 2064 | /* extend right lXAD */ |
2071 | index0 = index - 1; | 2065 | index0 = index - 1; |
2072 | XADlength(lxad, lxlen + nxlen); | 2066 | XADlength(lxad, lxlen + nxlen); |
2073 | 2067 | ||
2074 | /* If we just merged two extents together, need to make sure the | 2068 | /* If we just merged two extents together, need to make sure the |
2075 | * right extent gets logged. If the left one is marked XAD_NEW, | 2069 | * right extent gets logged. If the left one is marked XAD_NEW, |
2076 | * then we know it will be logged. Otherwise, mark as | 2070 | * then we know it will be logged. Otherwise, mark as |
2077 | * XAD_EXTENDED | 2071 | * XAD_EXTENDED |
2078 | */ | 2072 | */ |
2079 | if (!(lxad->flag & XAD_NEW)) | 2073 | if (!(lxad->flag & XAD_NEW)) |
2080 | lxad->flag |= XAD_EXTENDED; | 2074 | lxad->flag |= XAD_EXTENDED; |
2081 | 2075 | ||
2082 | if (xlen > nxlen) { | 2076 | if (xlen > nxlen) { |
2083 | /* truncate XAD */ | 2077 | /* truncate XAD */ |
2084 | XADoffset(xad, xoff + nxlen); | 2078 | XADoffset(xad, xoff + nxlen); |
2085 | XADlength(xad, xlen - nxlen); | 2079 | XADlength(xad, xlen - nxlen); |
2086 | XADaddress(xad, xaddr + nxlen); | 2080 | XADaddress(xad, xaddr + nxlen); |
2087 | goto out; | 2081 | goto out; |
2088 | } else { /* (xlen == nxlen) */ | 2082 | } else { /* (xlen == nxlen) */ |
2089 | 2083 | ||
2090 | /* remove XAD */ | 2084 | /* remove XAD */ |
2091 | if (index < nextindex - 1) | 2085 | if (index < nextindex - 1) |
2092 | memmove(&p->xad[index], &p->xad[index + 1], | 2086 | memmove(&p->xad[index], &p->xad[index + 1], |
2093 | (nextindex - index - | 2087 | (nextindex - index - |
2094 | 1) << L2XTSLOTSIZE); | 2088 | 1) << L2XTSLOTSIZE); |
2095 | 2089 | ||
2096 | p->header.nextindex = | 2090 | p->header.nextindex = |
2097 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - | 2091 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - |
2098 | 1); | 2092 | 1); |
2099 | 2093 | ||
2100 | index = index0; | 2094 | index = index0; |
2101 | newindex = index + 1; | 2095 | newindex = index + 1; |
2102 | nextindex = le16_to_cpu(p->header.nextindex); | 2096 | nextindex = le16_to_cpu(p->header.nextindex); |
2103 | xoff = nxoff = offsetXAD(lxad); | 2097 | xoff = nxoff = offsetXAD(lxad); |
2104 | xlen = nxlen = lxlen + nxlen; | 2098 | xlen = nxlen = lxlen + nxlen; |
2105 | xaddr = nxaddr = addressXAD(lxad); | 2099 | xaddr = nxaddr = addressXAD(lxad); |
2106 | goto coalesceRight; | 2100 | goto coalesceRight; |
2107 | } | 2101 | } |
2108 | } | 2102 | } |
2109 | 2103 | ||
2110 | /* | 2104 | /* |
2111 | * replace XAD with nXAD | 2105 | * replace XAD with nXAD |
2112 | */ | 2106 | */ |
2113 | replace: /* (nxoff == xoff) */ | 2107 | replace: /* (nxoff == xoff) */ |
2114 | if (nxlen == xlen) { | 2108 | if (nxlen == xlen) { |
2115 | /* replace XAD with nXAD:recorded */ | 2109 | /* replace XAD with nXAD:recorded */ |
2116 | *xad = *nxad; | 2110 | *xad = *nxad; |
2117 | xad->flag = xflag & ~XAD_NOTRECORDED; | 2111 | xad->flag = xflag & ~XAD_NOTRECORDED; |
2118 | 2112 | ||
2119 | goto coalesceRight; | 2113 | goto coalesceRight; |
2120 | } else /* (nxlen < xlen) */ | 2114 | } else /* (nxlen < xlen) */ |
2121 | goto updateLeft; | 2115 | goto updateLeft; |
2122 | 2116 | ||
2123 | /* | 2117 | /* |
2124 | * coalesce with right XAD | 2118 | * coalesce with right XAD |
2125 | */ | 2119 | */ |
2126 | coalesceRight: /* (xoff <= nxoff) */ | 2120 | coalesceRight: /* (xoff <= nxoff) */ |
2127 | /* is XAD last entry of page ? */ | 2121 | /* is XAD last entry of page ? */ |
2128 | if (newindex == nextindex) { | 2122 | if (newindex == nextindex) { |
2129 | if (xoff == nxoff) | 2123 | if (xoff == nxoff) |
2130 | goto out; | 2124 | goto out; |
2131 | goto updateRight; | 2125 | goto updateRight; |
2132 | } | 2126 | } |
2133 | 2127 | ||
2134 | /* is nXAD logically and physically contiguous with rXAD ? */ | 2128 | /* is nXAD logically and physically contiguous with rXAD ? */ |
2135 | rxad = &p->xad[index + 1]; | 2129 | rxad = &p->xad[index + 1]; |
2136 | rxlen = lengthXAD(rxad); | 2130 | rxlen = lengthXAD(rxad); |
2137 | if (!(rxad->flag & XAD_NOTRECORDED) && | 2131 | if (!(rxad->flag & XAD_NOTRECORDED) && |
2138 | (nxoff + nxlen == offsetXAD(rxad)) && | 2132 | (nxoff + nxlen == offsetXAD(rxad)) && |
2139 | (nxaddr + nxlen == addressXAD(rxad)) && | 2133 | (nxaddr + nxlen == addressXAD(rxad)) && |
2140 | (rxlen + nxlen < MAXXLEN)) { | 2134 | (rxlen + nxlen < MAXXLEN)) { |
2141 | /* extend left rXAD */ | 2135 | /* extend left rXAD */ |
2142 | XADoffset(rxad, nxoff); | 2136 | XADoffset(rxad, nxoff); |
2143 | XADlength(rxad, rxlen + nxlen); | 2137 | XADlength(rxad, rxlen + nxlen); |
2144 | XADaddress(rxad, nxaddr); | 2138 | XADaddress(rxad, nxaddr); |
2145 | 2139 | ||
2146 | /* If we just merged two extents together, need to make sure | 2140 | /* If we just merged two extents together, need to make sure |
2147 | * the left extent gets logged. If the right one is marked | 2141 | * the left extent gets logged. If the right one is marked |
2148 | * XAD_NEW, then we know it will be logged. Otherwise, mark as | 2142 | * XAD_NEW, then we know it will be logged. Otherwise, mark as |
2149 | * XAD_EXTENDED | 2143 | * XAD_EXTENDED |
2150 | */ | 2144 | */ |
2151 | if (!(rxad->flag & XAD_NEW)) | 2145 | if (!(rxad->flag & XAD_NEW)) |
2152 | rxad->flag |= XAD_EXTENDED; | 2146 | rxad->flag |= XAD_EXTENDED; |
2153 | 2147 | ||
2154 | if (xlen > nxlen) | 2148 | if (xlen > nxlen) |
2155 | /* truncate XAD */ | 2149 | /* truncate XAD */ |
2156 | XADlength(xad, xlen - nxlen); | 2150 | XADlength(xad, xlen - nxlen); |
2157 | else { /* (xlen == nxlen) */ | 2151 | else { /* (xlen == nxlen) */ |
2158 | 2152 | ||
2159 | /* remove XAD */ | 2153 | /* remove XAD */ |
2160 | memmove(&p->xad[index], &p->xad[index + 1], | 2154 | memmove(&p->xad[index], &p->xad[index + 1], |
2161 | (nextindex - index - 1) << L2XTSLOTSIZE); | 2155 | (nextindex - index - 1) << L2XTSLOTSIZE); |
2162 | 2156 | ||
2163 | p->header.nextindex = | 2157 | p->header.nextindex = |
2164 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - | 2158 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - |
2165 | 1); | 2159 | 1); |
2166 | } | 2160 | } |
2167 | 2161 | ||
2168 | goto out; | 2162 | goto out; |
2169 | } else if (xoff == nxoff) | 2163 | } else if (xoff == nxoff) |
2170 | goto out; | 2164 | goto out; |
2171 | 2165 | ||
2172 | if (xoff >= nxoff) { | 2166 | if (xoff >= nxoff) { |
2173 | XT_PUTPAGE(mp); | 2167 | XT_PUTPAGE(mp); |
2174 | jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); | 2168 | jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); |
2175 | return -EIO; | 2169 | return -EIO; |
2176 | } | 2170 | } |
2177 | /* #endif _JFS_WIP_COALESCE */ | 2171 | /* #endif _JFS_WIP_COALESCE */ |
2178 | 2172 | ||
2179 | /* | 2173 | /* |
2180 | * split XAD into (lXAD, nXAD): | 2174 | * split XAD into (lXAD, nXAD): |
2181 | * | 2175 | * |
2182 | * |---nXAD---> | 2176 | * |---nXAD---> |
2183 | * --|----------XAD----------|-- | 2177 | * --|----------XAD----------|-- |
2184 | * |-lXAD-| | 2178 | * |-lXAD-| |
2185 | */ | 2179 | */ |
2186 | updateRight: /* (xoff < nxoff) */ | 2180 | updateRight: /* (xoff < nxoff) */ |
2187 | /* truncate old XAD as lXAD:not_recorded */ | 2181 | /* truncate old XAD as lXAD:not_recorded */ |
2188 | xad = &p->xad[index]; | 2182 | xad = &p->xad[index]; |
2189 | XADlength(xad, nxoff - xoff); | 2183 | XADlength(xad, nxoff - xoff); |
2190 | 2184 | ||
2191 | /* insert nXAD:recorded */ | 2185 | /* insert nXAD:recorded */ |
2192 | if (nextindex == le16_to_cpu(p->header.maxentry)) { | 2186 | if (nextindex == le16_to_cpu(p->header.maxentry)) { |
2193 | 2187 | ||
2194 | /* xtSpliUp() unpins leaf pages */ | 2188 | /* xtSpliUp() unpins leaf pages */ |
2195 | split.mp = mp; | 2189 | split.mp = mp; |
2196 | split.index = newindex; | 2190 | split.index = newindex; |
2197 | split.flag = xflag & ~XAD_NOTRECORDED; | 2191 | split.flag = xflag & ~XAD_NOTRECORDED; |
2198 | split.off = nxoff; | 2192 | split.off = nxoff; |
2199 | split.len = nxlen; | 2193 | split.len = nxlen; |
2200 | split.addr = nxaddr; | 2194 | split.addr = nxaddr; |
2201 | split.pxdlist = NULL; | 2195 | split.pxdlist = NULL; |
2202 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) | 2196 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) |
2203 | return rc; | 2197 | return rc; |
2204 | 2198 | ||
2205 | /* get back old page */ | 2199 | /* get back old page */ |
2206 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 2200 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
2207 | if (rc) | 2201 | if (rc) |
2208 | return rc; | 2202 | return rc; |
2209 | /* | 2203 | /* |
2210 | * if leaf root has been split, original root has been | 2204 | * if leaf root has been split, original root has been |
2211 | * copied to new child page, i.e., original entry now | 2205 | * copied to new child page, i.e., original entry now |
2212 | * resides on the new child page; | 2206 | * resides on the new child page; |
2213 | */ | 2207 | */ |
2214 | if (p->header.flag & BT_INTERNAL) { | 2208 | if (p->header.flag & BT_INTERNAL) { |
2215 | ASSERT(p->header.nextindex == | 2209 | ASSERT(p->header.nextindex == |
2216 | cpu_to_le16(XTENTRYSTART + 1)); | 2210 | cpu_to_le16(XTENTRYSTART + 1)); |
2217 | xad = &p->xad[XTENTRYSTART]; | 2211 | xad = &p->xad[XTENTRYSTART]; |
2218 | bn = addressXAD(xad); | 2212 | bn = addressXAD(xad); |
2219 | XT_PUTPAGE(mp); | 2213 | XT_PUTPAGE(mp); |
2220 | 2214 | ||
2221 | /* get new child page */ | 2215 | /* get new child page */ |
2222 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 2216 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
2223 | if (rc) | 2217 | if (rc) |
2224 | return rc; | 2218 | return rc; |
2225 | 2219 | ||
2226 | BT_MARK_DIRTY(mp, ip); | 2220 | BT_MARK_DIRTY(mp, ip); |
2227 | if (!test_cflag(COMMIT_Nolink, ip)) { | 2221 | if (!test_cflag(COMMIT_Nolink, ip)) { |
2228 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); | 2222 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); |
2229 | xtlck = (struct xtlock *) & tlck->lock; | 2223 | xtlck = (struct xtlock *) & tlck->lock; |
2230 | } | 2224 | } |
2231 | } else { | 2225 | } else { |
2232 | /* is nXAD on new page ? */ | 2226 | /* is nXAD on new page ? */ |
2233 | if (newindex > | 2227 | if (newindex > |
2234 | (le16_to_cpu(p->header.maxentry) >> 1)) { | 2228 | (le16_to_cpu(p->header.maxentry) >> 1)) { |
2235 | newindex = | 2229 | newindex = |
2236 | newindex - | 2230 | newindex - |
2237 | le16_to_cpu(p->header.nextindex) + | 2231 | le16_to_cpu(p->header.nextindex) + |
2238 | XTENTRYSTART; | 2232 | XTENTRYSTART; |
2239 | newpage = 1; | 2233 | newpage = 1; |
2240 | } | 2234 | } |
2241 | } | 2235 | } |
2242 | } else { | 2236 | } else { |
2243 | /* if insert into middle, shift right remaining entries */ | 2237 | /* if insert into middle, shift right remaining entries */ |
2244 | if (newindex < nextindex) | 2238 | if (newindex < nextindex) |
2245 | memmove(&p->xad[newindex + 1], &p->xad[newindex], | 2239 | memmove(&p->xad[newindex + 1], &p->xad[newindex], |
2246 | (nextindex - newindex) << L2XTSLOTSIZE); | 2240 | (nextindex - newindex) << L2XTSLOTSIZE); |
2247 | 2241 | ||
2248 | /* insert the entry */ | 2242 | /* insert the entry */ |
2249 | xad = &p->xad[newindex]; | 2243 | xad = &p->xad[newindex]; |
2250 | *xad = *nxad; | 2244 | *xad = *nxad; |
2251 | xad->flag = xflag & ~XAD_NOTRECORDED; | 2245 | xad->flag = xflag & ~XAD_NOTRECORDED; |
2252 | 2246 | ||
2253 | /* advance next available entry index. */ | 2247 | /* advance next available entry index. */ |
2254 | p->header.nextindex = | 2248 | p->header.nextindex = |
2255 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | 2249 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); |
2256 | } | 2250 | } |
2257 | 2251 | ||
2258 | /* | 2252 | /* |
2259 | * does nXAD force 3-way split ? | 2253 | * does nXAD force 3-way split ? |
2260 | * | 2254 | * |
2261 | * |---nXAD--->| | 2255 | * |---nXAD--->| |
2262 | * --|----------XAD-------------|-- | 2256 | * --|----------XAD-------------|-- |
2263 | * |-lXAD-| |-rXAD -| | 2257 | * |-lXAD-| |-rXAD -| |
2264 | */ | 2258 | */ |
2265 | if (nxoff + nxlen == xoff + xlen) | 2259 | if (nxoff + nxlen == xoff + xlen) |
2266 | goto out; | 2260 | goto out; |
2267 | 2261 | ||
2268 | /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ | 2262 | /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ |
2269 | if (newpage) { | 2263 | if (newpage) { |
2270 | /* close out old page */ | 2264 | /* close out old page */ |
2271 | if (!test_cflag(COMMIT_Nolink, ip)) { | 2265 | if (!test_cflag(COMMIT_Nolink, ip)) { |
2272 | xtlck->lwm.offset = (xtlck->lwm.offset) ? | 2266 | xtlck->lwm.offset = (xtlck->lwm.offset) ? |
2273 | min(index0, (int)xtlck->lwm.offset) : index0; | 2267 | min(index0, (int)xtlck->lwm.offset) : index0; |
2274 | xtlck->lwm.length = | 2268 | xtlck->lwm.length = |
2275 | le16_to_cpu(p->header.nextindex) - | 2269 | le16_to_cpu(p->header.nextindex) - |
2276 | xtlck->lwm.offset; | 2270 | xtlck->lwm.offset; |
2277 | } | 2271 | } |
2278 | 2272 | ||
2279 | bn = le64_to_cpu(p->header.next); | 2273 | bn = le64_to_cpu(p->header.next); |
2280 | XT_PUTPAGE(mp); | 2274 | XT_PUTPAGE(mp); |
2281 | 2275 | ||
2282 | /* get new right page */ | 2276 | /* get new right page */ |
2283 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 2277 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
2284 | if (rc) | 2278 | if (rc) |
2285 | return rc; | 2279 | return rc; |
2286 | 2280 | ||
2287 | BT_MARK_DIRTY(mp, ip); | 2281 | BT_MARK_DIRTY(mp, ip); |
2288 | if (!test_cflag(COMMIT_Nolink, ip)) { | 2282 | if (!test_cflag(COMMIT_Nolink, ip)) { |
2289 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); | 2283 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); |
2290 | xtlck = (struct xtlock *) & tlck->lock; | 2284 | xtlck = (struct xtlock *) & tlck->lock; |
2291 | } | 2285 | } |
2292 | 2286 | ||
2293 | index0 = index = newindex; | 2287 | index0 = index = newindex; |
2294 | } else | 2288 | } else |
2295 | index++; | 2289 | index++; |
2296 | 2290 | ||
2297 | newindex = index + 1; | 2291 | newindex = index + 1; |
2298 | nextindex = le16_to_cpu(p->header.nextindex); | 2292 | nextindex = le16_to_cpu(p->header.nextindex); |
2299 | xlen = xlen - (nxoff - xoff); | 2293 | xlen = xlen - (nxoff - xoff); |
2300 | xoff = nxoff; | 2294 | xoff = nxoff; |
2301 | xaddr = nxaddr; | 2295 | xaddr = nxaddr; |
2302 | 2296 | ||
2303 | /* recompute split pages */ | 2297 | /* recompute split pages */ |
2304 | if (nextindex == le16_to_cpu(p->header.maxentry)) { | 2298 | if (nextindex == le16_to_cpu(p->header.maxentry)) { |
2305 | XT_PUTPAGE(mp); | 2299 | XT_PUTPAGE(mp); |
2306 | 2300 | ||
2307 | if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) | 2301 | if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) |
2308 | return rc; | 2302 | return rc; |
2309 | 2303 | ||
2310 | /* retrieve search result */ | 2304 | /* retrieve search result */ |
2311 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); | 2305 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); |
2312 | 2306 | ||
2313 | if (cmp != 0) { | 2307 | if (cmp != 0) { |
2314 | XT_PUTPAGE(mp); | 2308 | XT_PUTPAGE(mp); |
2315 | jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); | 2309 | jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); |
2316 | return -EIO; | 2310 | return -EIO; |
2317 | } | 2311 | } |
2318 | 2312 | ||
2319 | if (index0 != index) { | 2313 | if (index0 != index) { |
2320 | XT_PUTPAGE(mp); | 2314 | XT_PUTPAGE(mp); |
2321 | jfs_error(ip->i_sb, | 2315 | jfs_error(ip->i_sb, |
2322 | "xtUpdate: unexpected value of index"); | 2316 | "xtUpdate: unexpected value of index"); |
2323 | return -EIO; | 2317 | return -EIO; |
2324 | } | 2318 | } |
2325 | } | 2319 | } |
2326 | 2320 | ||
2327 | /* | 2321 | /* |
2328 | * split XAD into (nXAD, rXAD) | 2322 | * split XAD into (nXAD, rXAD) |
2329 | * | 2323 | * |
2330 | * ---nXAD---| | 2324 | * ---nXAD---| |
2331 | * --|----------XAD----------|-- | 2325 | * --|----------XAD----------|-- |
2332 | * |-rXAD-| | 2326 | * |-rXAD-| |
2333 | */ | 2327 | */ |
2334 | updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ | 2328 | updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ |
2335 | /* update old XAD with nXAD:recorded */ | 2329 | /* update old XAD with nXAD:recorded */ |
2336 | xad = &p->xad[index]; | 2330 | xad = &p->xad[index]; |
2337 | *xad = *nxad; | 2331 | *xad = *nxad; |
2338 | xad->flag = xflag & ~XAD_NOTRECORDED; | 2332 | xad->flag = xflag & ~XAD_NOTRECORDED; |
2339 | 2333 | ||
2340 | /* insert rXAD:not_recorded */ | 2334 | /* insert rXAD:not_recorded */ |
2341 | xoff = xoff + nxlen; | 2335 | xoff = xoff + nxlen; |
2342 | xlen = xlen - nxlen; | 2336 | xlen = xlen - nxlen; |
2343 | xaddr = xaddr + nxlen; | 2337 | xaddr = xaddr + nxlen; |
2344 | if (nextindex == le16_to_cpu(p->header.maxentry)) { | 2338 | if (nextindex == le16_to_cpu(p->header.maxentry)) { |
2345 | /* | 2339 | /* |
2346 | printf("xtUpdate.updateLeft.split p:0x%p\n", p); | 2340 | printf("xtUpdate.updateLeft.split p:0x%p\n", p); |
2347 | */ | 2341 | */ |
2348 | /* xtSpliUp() unpins leaf pages */ | 2342 | /* xtSpliUp() unpins leaf pages */ |
2349 | split.mp = mp; | 2343 | split.mp = mp; |
2350 | split.index = newindex; | 2344 | split.index = newindex; |
2351 | split.flag = xflag; | 2345 | split.flag = xflag; |
2352 | split.off = xoff; | 2346 | split.off = xoff; |
2353 | split.len = xlen; | 2347 | split.len = xlen; |
2354 | split.addr = xaddr; | 2348 | split.addr = xaddr; |
2355 | split.pxdlist = NULL; | 2349 | split.pxdlist = NULL; |
2356 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) | 2350 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) |
2357 | return rc; | 2351 | return rc; |
2358 | 2352 | ||
2359 | /* get back old page */ | 2353 | /* get back old page */ |
2360 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 2354 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
2361 | if (rc) | 2355 | if (rc) |
2362 | return rc; | 2356 | return rc; |
2363 | 2357 | ||
2364 | /* | 2358 | /* |
2365 | * if leaf root has been split, original root has been | 2359 | * if leaf root has been split, original root has been |
2366 | * copied to new child page, i.e., original entry now | 2360 | * copied to new child page, i.e., original entry now |
2367 | * resides on the new child page; | 2361 | * resides on the new child page; |
2368 | */ | 2362 | */ |
2369 | if (p->header.flag & BT_INTERNAL) { | 2363 | if (p->header.flag & BT_INTERNAL) { |
2370 | ASSERT(p->header.nextindex == | 2364 | ASSERT(p->header.nextindex == |
2371 | cpu_to_le16(XTENTRYSTART + 1)); | 2365 | cpu_to_le16(XTENTRYSTART + 1)); |
2372 | xad = &p->xad[XTENTRYSTART]; | 2366 | xad = &p->xad[XTENTRYSTART]; |
2373 | bn = addressXAD(xad); | 2367 | bn = addressXAD(xad); |
2374 | XT_PUTPAGE(mp); | 2368 | XT_PUTPAGE(mp); |
2375 | 2369 | ||
2376 | /* get new child page */ | 2370 | /* get new child page */ |
2377 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 2371 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
2378 | if (rc) | 2372 | if (rc) |
2379 | return rc; | 2373 | return rc; |
2380 | 2374 | ||
2381 | BT_MARK_DIRTY(mp, ip); | 2375 | BT_MARK_DIRTY(mp, ip); |
2382 | if (!test_cflag(COMMIT_Nolink, ip)) { | 2376 | if (!test_cflag(COMMIT_Nolink, ip)) { |
2383 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); | 2377 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); |
2384 | xtlck = (struct xtlock *) & tlck->lock; | 2378 | xtlck = (struct xtlock *) & tlck->lock; |
2385 | } | 2379 | } |
2386 | } | 2380 | } |
2387 | } else { | 2381 | } else { |
2388 | /* if insert into middle, shift right remaining entries */ | 2382 | /* if insert into middle, shift right remaining entries */ |
2389 | if (newindex < nextindex) | 2383 | if (newindex < nextindex) |
2390 | memmove(&p->xad[newindex + 1], &p->xad[newindex], | 2384 | memmove(&p->xad[newindex + 1], &p->xad[newindex], |
2391 | (nextindex - newindex) << L2XTSLOTSIZE); | 2385 | (nextindex - newindex) << L2XTSLOTSIZE); |
2392 | 2386 | ||
2393 | /* insert the entry */ | 2387 | /* insert the entry */ |
2394 | xad = &p->xad[newindex]; | 2388 | xad = &p->xad[newindex]; |
2395 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); | 2389 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); |
2396 | 2390 | ||
2397 | /* advance next available entry index. */ | 2391 | /* advance next available entry index. */ |
2398 | p->header.nextindex = | 2392 | p->header.nextindex = |
2399 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | 2393 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); |
2400 | } | 2394 | } |
2401 | 2395 | ||
2402 | out: | 2396 | out: |
2403 | if (!test_cflag(COMMIT_Nolink, ip)) { | 2397 | if (!test_cflag(COMMIT_Nolink, ip)) { |
2404 | xtlck->lwm.offset = (xtlck->lwm.offset) ? | 2398 | xtlck->lwm.offset = (xtlck->lwm.offset) ? |
2405 | min(index0, (int)xtlck->lwm.offset) : index0; | 2399 | min(index0, (int)xtlck->lwm.offset) : index0; |
2406 | xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - | 2400 | xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - |
2407 | xtlck->lwm.offset; | 2401 | xtlck->lwm.offset; |
2408 | } | 2402 | } |
2409 | 2403 | ||
2410 | /* unpin the leaf page */ | 2404 | /* unpin the leaf page */ |
2411 | XT_PUTPAGE(mp); | 2405 | XT_PUTPAGE(mp); |
2412 | 2406 | ||
2413 | return rc; | 2407 | return rc; |
2414 | } | 2408 | } |
2415 | 2409 | ||
2416 | 2410 | ||
2417 | /* | 2411 | /* |
2418 | * xtAppend() | 2412 | * xtAppend() |
2419 | * | 2413 | * |
2420 | * function: grow in append mode from contiguous region specified ; | 2414 | * function: grow in append mode from contiguous region specified ; |
2421 | * | 2415 | * |
2422 | * parameter: | 2416 | * parameter: |
2423 | * tid - transaction id; | 2417 | * tid - transaction id; |
2424 | * ip - file object; | 2418 | * ip - file object; |
2425 | * xflag - extent flag: | 2419 | * xflag - extent flag: |
2426 | * xoff - extent offset; | 2420 | * xoff - extent offset; |
2427 | * maxblocks - max extent length; | 2421 | * maxblocks - max extent length; |
2428 | * xlen - extent length (in/out); | 2422 | * xlen - extent length (in/out); |
2429 | * xaddrp - extent address pointer (in/out): | 2423 | * xaddrp - extent address pointer (in/out): |
2430 | * flag - | 2424 | * flag - |
2431 | * | 2425 | * |
2432 | * return: | 2426 | * return: |
2433 | */ | 2427 | */ |
2434 | int xtAppend(tid_t tid, /* transaction id */ | 2428 | int xtAppend(tid_t tid, /* transaction id */ |
2435 | struct inode *ip, int xflag, s64 xoff, s32 maxblocks, | 2429 | struct inode *ip, int xflag, s64 xoff, s32 maxblocks, |
2436 | s32 * xlenp, /* (in/out) */ | 2430 | s32 * xlenp, /* (in/out) */ |
2437 | s64 * xaddrp, /* (in/out) */ | 2431 | s64 * xaddrp, /* (in/out) */ |
2438 | int flag) | 2432 | int flag) |
2439 | { | 2433 | { |
2440 | int rc = 0; | 2434 | int rc = 0; |
2441 | struct metapage *mp; /* meta-page buffer */ | 2435 | struct metapage *mp; /* meta-page buffer */ |
2442 | xtpage_t *p; /* base B+-tree index page */ | 2436 | xtpage_t *p; /* base B+-tree index page */ |
2443 | s64 bn, xaddr; | 2437 | s64 bn, xaddr; |
2444 | int index, nextindex; | 2438 | int index, nextindex; |
2445 | struct btstack btstack; /* traverse stack */ | 2439 | struct btstack btstack; /* traverse stack */ |
2446 | struct xtsplit split; /* split information */ | 2440 | struct xtsplit split; /* split information */ |
2447 | xad_t *xad; | 2441 | xad_t *xad; |
2448 | int cmp; | 2442 | int cmp; |
2449 | struct tlock *tlck; | 2443 | struct tlock *tlck; |
2450 | struct xtlock *xtlck; | 2444 | struct xtlock *xtlck; |
2451 | int nsplit, nblocks, xlen; | 2445 | int nsplit, nblocks, xlen; |
2452 | struct pxdlist pxdlist; | 2446 | struct pxdlist pxdlist; |
2453 | pxd_t *pxd; | 2447 | pxd_t *pxd; |
2454 | s64 next; | 2448 | s64 next; |
2455 | 2449 | ||
2456 | xaddr = *xaddrp; | 2450 | xaddr = *xaddrp; |
2457 | xlen = *xlenp; | 2451 | xlen = *xlenp; |
2458 | jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", | 2452 | jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", |
2459 | (ulong) xoff, maxblocks, xlen, (ulong) xaddr); | 2453 | (ulong) xoff, maxblocks, xlen, (ulong) xaddr); |
2460 | 2454 | ||
2461 | /* | 2455 | /* |
2462 | * search for the entry location at which to insert: | 2456 | * search for the entry location at which to insert: |
2463 | * | 2457 | * |
2464 | * xtFastSearch() and xtSearch() both returns (leaf page | 2458 | * xtFastSearch() and xtSearch() both returns (leaf page |
2465 | * pinned, index at which to insert). | 2459 | * pinned, index at which to insert). |
2466 | * n.b. xtSearch() may return index of maxentry of | 2460 | * n.b. xtSearch() may return index of maxentry of |
2467 | * the full page. | 2461 | * the full page. |
2468 | */ | 2462 | */ |
2469 | if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) | 2463 | if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) |
2470 | return rc; | 2464 | return rc; |
2471 | 2465 | ||
2472 | /* retrieve search result */ | 2466 | /* retrieve search result */ |
2473 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); | 2467 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); |
2474 | 2468 | ||
2475 | if (cmp == 0) { | 2469 | if (cmp == 0) { |
2476 | rc = -EEXIST; | 2470 | rc = -EEXIST; |
2477 | goto out; | 2471 | goto out; |
2478 | } | 2472 | } |
2479 | 2473 | ||
2480 | if (next) | 2474 | if (next) |
2481 | xlen = min(xlen, (int)(next - xoff)); | 2475 | xlen = min(xlen, (int)(next - xoff)); |
2482 | //insert: | 2476 | //insert: |
2483 | /* | 2477 | /* |
2484 | * insert entry for new extent | 2478 | * insert entry for new extent |
2485 | */ | 2479 | */ |
2486 | xflag |= XAD_NEW; | 2480 | xflag |= XAD_NEW; |
2487 | 2481 | ||
2488 | /* | 2482 | /* |
2489 | * if the leaf page is full, split the page and | 2483 | * if the leaf page is full, split the page and |
2490 | * propagate up the router entry for the new page from split | 2484 | * propagate up the router entry for the new page from split |
2491 | * | 2485 | * |
2492 | * The xtSplitUp() will insert the entry and unpin the leaf page. | 2486 | * The xtSplitUp() will insert the entry and unpin the leaf page. |
2493 | */ | 2487 | */ |
2494 | nextindex = le16_to_cpu(p->header.nextindex); | 2488 | nextindex = le16_to_cpu(p->header.nextindex); |
2495 | if (nextindex < le16_to_cpu(p->header.maxentry)) | 2489 | if (nextindex < le16_to_cpu(p->header.maxentry)) |
2496 | goto insertLeaf; | 2490 | goto insertLeaf; |
2497 | 2491 | ||
2498 | /* | 2492 | /* |
2499 | * allocate new index blocks to cover index page split(s) | 2493 | * allocate new index blocks to cover index page split(s) |
2500 | */ | 2494 | */ |
2501 | nsplit = btstack.nsplit; | 2495 | nsplit = btstack.nsplit; |
2502 | split.pxdlist = &pxdlist; | 2496 | split.pxdlist = &pxdlist; |
2503 | pxdlist.maxnpxd = pxdlist.npxd = 0; | 2497 | pxdlist.maxnpxd = pxdlist.npxd = 0; |
2504 | pxd = &pxdlist.pxd[0]; | 2498 | pxd = &pxdlist.pxd[0]; |
2505 | nblocks = JFS_SBI(ip->i_sb)->nbperpage; | 2499 | nblocks = JFS_SBI(ip->i_sb)->nbperpage; |
2506 | for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { | 2500 | for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { |
2507 | if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { | 2501 | if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { |
2508 | PXDaddress(pxd, xaddr); | 2502 | PXDaddress(pxd, xaddr); |
2509 | PXDlength(pxd, nblocks); | 2503 | PXDlength(pxd, nblocks); |
2510 | 2504 | ||
2511 | pxdlist.maxnpxd++; | 2505 | pxdlist.maxnpxd++; |
2512 | 2506 | ||
2513 | continue; | 2507 | continue; |
2514 | } | 2508 | } |
2515 | 2509 | ||
2516 | /* undo allocation */ | 2510 | /* undo allocation */ |
2517 | 2511 | ||
2518 | goto out; | 2512 | goto out; |
2519 | } | 2513 | } |
2520 | 2514 | ||
2521 | xlen = min(xlen, maxblocks); | 2515 | xlen = min(xlen, maxblocks); |
2522 | 2516 | ||
2523 | /* | 2517 | /* |
2524 | * allocate data extent requested | 2518 | * allocate data extent requested |
2525 | */ | 2519 | */ |
2526 | if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) | 2520 | if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) |
2527 | goto out; | 2521 | goto out; |
2528 | 2522 | ||
2529 | split.mp = mp; | 2523 | split.mp = mp; |
2530 | split.index = index; | 2524 | split.index = index; |
2531 | split.flag = xflag; | 2525 | split.flag = xflag; |
2532 | split.off = xoff; | 2526 | split.off = xoff; |
2533 | split.len = xlen; | 2527 | split.len = xlen; |
2534 | split.addr = xaddr; | 2528 | split.addr = xaddr; |
2535 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { | 2529 | if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { |
2536 | /* undo data extent allocation */ | 2530 | /* undo data extent allocation */ |
2537 | dbFree(ip, *xaddrp, (s64) * xlenp); | 2531 | dbFree(ip, *xaddrp, (s64) * xlenp); |
2538 | 2532 | ||
2539 | return rc; | 2533 | return rc; |
2540 | } | 2534 | } |
2541 | 2535 | ||
2542 | *xaddrp = xaddr; | 2536 | *xaddrp = xaddr; |
2543 | *xlenp = xlen; | 2537 | *xlenp = xlen; |
2544 | return 0; | 2538 | return 0; |
2545 | 2539 | ||
2546 | /* | 2540 | /* |
2547 | * insert the new entry into the leaf page | 2541 | * insert the new entry into the leaf page |
2548 | */ | 2542 | */ |
2549 | insertLeaf: | 2543 | insertLeaf: |
2550 | /* | 2544 | /* |
2551 | * allocate data extent requested | 2545 | * allocate data extent requested |
2552 | */ | 2546 | */ |
2553 | if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) | 2547 | if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) |
2554 | goto out; | 2548 | goto out; |
2555 | 2549 | ||
2556 | BT_MARK_DIRTY(mp, ip); | 2550 | BT_MARK_DIRTY(mp, ip); |
2557 | /* | 2551 | /* |
2558 | * acquire a transaction lock on the leaf page; | 2552 | * acquire a transaction lock on the leaf page; |
2559 | * | 2553 | * |
2560 | * action: xad insertion/extension; | 2554 | * action: xad insertion/extension; |
2561 | */ | 2555 | */ |
2562 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); | 2556 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); |
2563 | xtlck = (struct xtlock *) & tlck->lock; | 2557 | xtlck = (struct xtlock *) & tlck->lock; |
2564 | 2558 | ||
2565 | /* insert the new entry: mark the entry NEW */ | 2559 | /* insert the new entry: mark the entry NEW */ |
2566 | xad = &p->xad[index]; | 2560 | xad = &p->xad[index]; |
2567 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); | 2561 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); |
2568 | 2562 | ||
2569 | /* advance next available entry index */ | 2563 | /* advance next available entry index */ |
2570 | p->header.nextindex = | 2564 | le16_add_cpu(&p->header.nextindex, 1); |
2571 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
2572 | 2565 | ||
2573 | xtlck->lwm.offset = | 2566 | xtlck->lwm.offset = |
2574 | (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; | 2567 | (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; |
2575 | xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - | 2568 | xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - |
2576 | xtlck->lwm.offset; | 2569 | xtlck->lwm.offset; |
2577 | 2570 | ||
2578 | *xaddrp = xaddr; | 2571 | *xaddrp = xaddr; |
2579 | *xlenp = xlen; | 2572 | *xlenp = xlen; |
2580 | 2573 | ||
2581 | out: | 2574 | out: |
2582 | /* unpin the leaf page */ | 2575 | /* unpin the leaf page */ |
2583 | XT_PUTPAGE(mp); | 2576 | XT_PUTPAGE(mp); |
2584 | 2577 | ||
2585 | return rc; | 2578 | return rc; |
2586 | } | 2579 | } |
2587 | #ifdef _STILL_TO_PORT | 2580 | #ifdef _STILL_TO_PORT |
2588 | 2581 | ||
2589 | /* - TBD for defragmentaion/reorganization - | 2582 | /* - TBD for defragmentaion/reorganization - |
2590 | * | 2583 | * |
2591 | * xtDelete() | 2584 | * xtDelete() |
2592 | * | 2585 | * |
2593 | * function: | 2586 | * function: |
2594 | * delete the entry with the specified key. | 2587 | * delete the entry with the specified key. |
2595 | * | 2588 | * |
2596 | * N.B.: whole extent of the entry is assumed to be deleted. | 2589 | * N.B.: whole extent of the entry is assumed to be deleted. |
2597 | * | 2590 | * |
2598 | * parameter: | 2591 | * parameter: |
2599 | * | 2592 | * |
2600 | * return: | 2593 | * return: |
2601 | * ENOENT: if the entry is not found. | 2594 | * ENOENT: if the entry is not found. |
2602 | * | 2595 | * |
2603 | * exception: | 2596 | * exception: |
2604 | */ | 2597 | */ |
2605 | int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) | 2598 | int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) |
2606 | { | 2599 | { |
2607 | int rc = 0; | 2600 | int rc = 0; |
2608 | struct btstack btstack; | 2601 | struct btstack btstack; |
2609 | int cmp; | 2602 | int cmp; |
2610 | s64 bn; | 2603 | s64 bn; |
2611 | struct metapage *mp; | 2604 | struct metapage *mp; |
2612 | xtpage_t *p; | 2605 | xtpage_t *p; |
2613 | int index, nextindex; | 2606 | int index, nextindex; |
2614 | struct tlock *tlck; | 2607 | struct tlock *tlck; |
2615 | struct xtlock *xtlck; | 2608 | struct xtlock *xtlck; |
2616 | 2609 | ||
2617 | /* | 2610 | /* |
2618 | * find the matching entry; xtSearch() pins the page | 2611 | * find the matching entry; xtSearch() pins the page |
2619 | */ | 2612 | */ |
2620 | if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) | 2613 | if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) |
2621 | return rc; | 2614 | return rc; |
2622 | 2615 | ||
2623 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); | 2616 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); |
2624 | if (cmp) { | 2617 | if (cmp) { |
2625 | /* unpin the leaf page */ | 2618 | /* unpin the leaf page */ |
2626 | XT_PUTPAGE(mp); | 2619 | XT_PUTPAGE(mp); |
2627 | return -ENOENT; | 2620 | return -ENOENT; |
2628 | } | 2621 | } |
2629 | 2622 | ||
2630 | /* | 2623 | /* |
2631 | * delete the entry from the leaf page | 2624 | * delete the entry from the leaf page |
2632 | */ | 2625 | */ |
2633 | nextindex = le16_to_cpu(p->header.nextindex); | 2626 | nextindex = le16_to_cpu(p->header.nextindex); |
2634 | p->header.nextindex = | 2627 | le16_add_cpu(&p->header.nextindex, -1); |
2635 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); | ||
2636 | 2628 | ||
2637 | /* | 2629 | /* |
2638 | * if the leaf page bocome empty, free the page | 2630 | * if the leaf page bocome empty, free the page |
2639 | */ | 2631 | */ |
2640 | if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) | 2632 | if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) |
2641 | return (xtDeleteUp(tid, ip, mp, p, &btstack)); | 2633 | return (xtDeleteUp(tid, ip, mp, p, &btstack)); |
2642 | 2634 | ||
2643 | BT_MARK_DIRTY(mp, ip); | 2635 | BT_MARK_DIRTY(mp, ip); |
2644 | /* | 2636 | /* |
2645 | * acquire a transaction lock on the leaf page; | 2637 | * acquire a transaction lock on the leaf page; |
2646 | * | 2638 | * |
2647 | * action:xad deletion; | 2639 | * action:xad deletion; |
2648 | */ | 2640 | */ |
2649 | tlck = txLock(tid, ip, mp, tlckXTREE); | 2641 | tlck = txLock(tid, ip, mp, tlckXTREE); |
2650 | xtlck = (struct xtlock *) & tlck->lock; | 2642 | xtlck = (struct xtlock *) & tlck->lock; |
2651 | xtlck->lwm.offset = | 2643 | xtlck->lwm.offset = |
2652 | (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; | 2644 | (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; |
2653 | 2645 | ||
2654 | /* if delete from middle, shift left/compact the remaining entries */ | 2646 | /* if delete from middle, shift left/compact the remaining entries */ |
2655 | if (index < nextindex - 1) | 2647 | if (index < nextindex - 1) |
2656 | memmove(&p->xad[index], &p->xad[index + 1], | 2648 | memmove(&p->xad[index], &p->xad[index + 1], |
2657 | (nextindex - index - 1) * sizeof(xad_t)); | 2649 | (nextindex - index - 1) * sizeof(xad_t)); |
2658 | 2650 | ||
2659 | XT_PUTPAGE(mp); | 2651 | XT_PUTPAGE(mp); |
2660 | 2652 | ||
2661 | return 0; | 2653 | return 0; |
2662 | } | 2654 | } |
2663 | 2655 | ||
2664 | 2656 | ||
2665 | /* - TBD for defragmentaion/reorganization - | 2657 | /* - TBD for defragmentaion/reorganization - |
2666 | * | 2658 | * |
2667 | * xtDeleteUp() | 2659 | * xtDeleteUp() |
2668 | * | 2660 | * |
2669 | * function: | 2661 | * function: |
2670 | * free empty pages as propagating deletion up the tree | 2662 | * free empty pages as propagating deletion up the tree |
2671 | * | 2663 | * |
2672 | * parameter: | 2664 | * parameter: |
2673 | * | 2665 | * |
2674 | * return: | 2666 | * return: |
2675 | */ | 2667 | */ |
2676 | static int | 2668 | static int |
2677 | xtDeleteUp(tid_t tid, struct inode *ip, | 2669 | xtDeleteUp(tid_t tid, struct inode *ip, |
2678 | struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) | 2670 | struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) |
2679 | { | 2671 | { |
2680 | int rc = 0; | 2672 | int rc = 0; |
2681 | struct metapage *mp; | 2673 | struct metapage *mp; |
2682 | xtpage_t *p; | 2674 | xtpage_t *p; |
2683 | int index, nextindex; | 2675 | int index, nextindex; |
2684 | s64 xaddr; | 2676 | s64 xaddr; |
2685 | int xlen; | 2677 | int xlen; |
2686 | struct btframe *parent; | 2678 | struct btframe *parent; |
2687 | struct tlock *tlck; | 2679 | struct tlock *tlck; |
2688 | struct xtlock *xtlck; | 2680 | struct xtlock *xtlck; |
2689 | 2681 | ||
2690 | /* | 2682 | /* |
2691 | * keep root leaf page which has become empty | 2683 | * keep root leaf page which has become empty |
2692 | */ | 2684 | */ |
2693 | if (fp->header.flag & BT_ROOT) { | 2685 | if (fp->header.flag & BT_ROOT) { |
2694 | /* keep the root page */ | 2686 | /* keep the root page */ |
2695 | fp->header.flag &= ~BT_INTERNAL; | 2687 | fp->header.flag &= ~BT_INTERNAL; |
2696 | fp->header.flag |= BT_LEAF; | 2688 | fp->header.flag |= BT_LEAF; |
2697 | fp->header.nextindex = cpu_to_le16(XTENTRYSTART); | 2689 | fp->header.nextindex = cpu_to_le16(XTENTRYSTART); |
2698 | 2690 | ||
2699 | /* XT_PUTPAGE(fmp); */ | 2691 | /* XT_PUTPAGE(fmp); */ |
2700 | 2692 | ||
2701 | return 0; | 2693 | return 0; |
2702 | } | 2694 | } |
2703 | 2695 | ||
2704 | /* | 2696 | /* |
2705 | * free non-root leaf page | 2697 | * free non-root leaf page |
2706 | */ | 2698 | */ |
2707 | if ((rc = xtRelink(tid, ip, fp))) { | 2699 | if ((rc = xtRelink(tid, ip, fp))) { |
2708 | XT_PUTPAGE(fmp); | 2700 | XT_PUTPAGE(fmp); |
2709 | return rc; | 2701 | return rc; |
2710 | } | 2702 | } |
2711 | 2703 | ||
2712 | xaddr = addressPXD(&fp->header.self); | 2704 | xaddr = addressPXD(&fp->header.self); |
2713 | xlen = lengthPXD(&fp->header.self); | 2705 | xlen = lengthPXD(&fp->header.self); |
2714 | /* free the page extent */ | 2706 | /* free the page extent */ |
2715 | dbFree(ip, xaddr, (s64) xlen); | 2707 | dbFree(ip, xaddr, (s64) xlen); |
2716 | 2708 | ||
2717 | /* free the buffer page */ | 2709 | /* free the buffer page */ |
2718 | discard_metapage(fmp); | 2710 | discard_metapage(fmp); |
2719 | 2711 | ||
2720 | /* | 2712 | /* |
2721 | * propagate page deletion up the index tree | 2713 | * propagate page deletion up the index tree |
2722 | * | 2714 | * |
2723 | * If the delete from the parent page makes it empty, | 2715 | * If the delete from the parent page makes it empty, |
2724 | * continue all the way up the tree. | 2716 | * continue all the way up the tree. |
2725 | * stop if the root page is reached (which is never deleted) or | 2717 | * stop if the root page is reached (which is never deleted) or |
2726 | * if the entry deletion does not empty the page. | 2718 | * if the entry deletion does not empty the page. |
2727 | */ | 2719 | */ |
2728 | while ((parent = BT_POP(btstack)) != NULL) { | 2720 | while ((parent = BT_POP(btstack)) != NULL) { |
2729 | /* get/pin the parent page <sp> */ | 2721 | /* get/pin the parent page <sp> */ |
2730 | XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); | 2722 | XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); |
2731 | if (rc) | 2723 | if (rc) |
2732 | return rc; | 2724 | return rc; |
2733 | 2725 | ||
2734 | index = parent->index; | 2726 | index = parent->index; |
2735 | 2727 | ||
2736 | /* delete the entry for the freed child page from parent. | 2728 | /* delete the entry for the freed child page from parent. |
2737 | */ | 2729 | */ |
2738 | nextindex = le16_to_cpu(p->header.nextindex); | 2730 | nextindex = le16_to_cpu(p->header.nextindex); |
2739 | 2731 | ||
2740 | /* | 2732 | /* |
2741 | * the parent has the single entry being deleted: | 2733 | * the parent has the single entry being deleted: |
2742 | * free the parent page which has become empty. | 2734 | * free the parent page which has become empty. |
2743 | */ | 2735 | */ |
2744 | if (nextindex == 1) { | 2736 | if (nextindex == 1) { |
2745 | if (p->header.flag & BT_ROOT) { | 2737 | if (p->header.flag & BT_ROOT) { |
2746 | /* keep the root page */ | 2738 | /* keep the root page */ |
2747 | p->header.flag &= ~BT_INTERNAL; | 2739 | p->header.flag &= ~BT_INTERNAL; |
2748 | p->header.flag |= BT_LEAF; | 2740 | p->header.flag |= BT_LEAF; |
2749 | p->header.nextindex = | 2741 | p->header.nextindex = |
2750 | cpu_to_le16(XTENTRYSTART); | 2742 | cpu_to_le16(XTENTRYSTART); |
2751 | 2743 | ||
2752 | /* XT_PUTPAGE(mp); */ | 2744 | /* XT_PUTPAGE(mp); */ |
2753 | 2745 | ||
2754 | break; | 2746 | break; |
2755 | } else { | 2747 | } else { |
2756 | /* free the parent page */ | 2748 | /* free the parent page */ |
2757 | if ((rc = xtRelink(tid, ip, p))) | 2749 | if ((rc = xtRelink(tid, ip, p))) |
2758 | return rc; | 2750 | return rc; |
2759 | 2751 | ||
2760 | xaddr = addressPXD(&p->header.self); | 2752 | xaddr = addressPXD(&p->header.self); |
2761 | /* free the page extent */ | 2753 | /* free the page extent */ |
2762 | dbFree(ip, xaddr, | 2754 | dbFree(ip, xaddr, |
2763 | (s64) JFS_SBI(ip->i_sb)->nbperpage); | 2755 | (s64) JFS_SBI(ip->i_sb)->nbperpage); |
2764 | 2756 | ||
2765 | /* unpin/free the buffer page */ | 2757 | /* unpin/free the buffer page */ |
2766 | discard_metapage(mp); | 2758 | discard_metapage(mp); |
2767 | 2759 | ||
2768 | /* propagate up */ | 2760 | /* propagate up */ |
2769 | continue; | 2761 | continue; |
2770 | } | 2762 | } |
2771 | } | 2763 | } |
2772 | /* | 2764 | /* |
2773 | * the parent has other entries remaining: | 2765 | * the parent has other entries remaining: |
2774 | * delete the router entry from the parent page. | 2766 | * delete the router entry from the parent page. |
2775 | */ | 2767 | */ |
2776 | else { | 2768 | else { |
2777 | BT_MARK_DIRTY(mp, ip); | 2769 | BT_MARK_DIRTY(mp, ip); |
2778 | /* | 2770 | /* |
2779 | * acquire a transaction lock on the leaf page; | 2771 | * acquire a transaction lock on the leaf page; |
2780 | * | 2772 | * |
2781 | * action:xad deletion; | 2773 | * action:xad deletion; |
2782 | */ | 2774 | */ |
2783 | tlck = txLock(tid, ip, mp, tlckXTREE); | 2775 | tlck = txLock(tid, ip, mp, tlckXTREE); |
2784 | xtlck = (struct xtlock *) & tlck->lock; | 2776 | xtlck = (struct xtlock *) & tlck->lock; |
2785 | xtlck->lwm.offset = | 2777 | xtlck->lwm.offset = |
2786 | (xtlck->lwm.offset) ? min(index, | 2778 | (xtlck->lwm.offset) ? min(index, |
2787 | xtlck->lwm. | 2779 | xtlck->lwm. |
2788 | offset) : index; | 2780 | offset) : index; |
2789 | 2781 | ||
2790 | /* if delete from middle, | 2782 | /* if delete from middle, |
2791 | * shift left/compact the remaining entries in the page | 2783 | * shift left/compact the remaining entries in the page |
2792 | */ | 2784 | */ |
2793 | if (index < nextindex - 1) | 2785 | if (index < nextindex - 1) |
2794 | memmove(&p->xad[index], &p->xad[index + 1], | 2786 | memmove(&p->xad[index], &p->xad[index + 1], |
2795 | (nextindex - index - | 2787 | (nextindex - index - |
2796 | 1) << L2XTSLOTSIZE); | 2788 | 1) << L2XTSLOTSIZE); |
2797 | 2789 | ||
2798 | p->header.nextindex = | 2790 | le16_add_cpu(&p->header.nextindex, -1); |
2799 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - | ||
2800 | 1); | ||
2801 | jfs_info("xtDeleteUp(entry): 0x%lx[%d]", | 2791 | jfs_info("xtDeleteUp(entry): 0x%lx[%d]", |
2802 | (ulong) parent->bn, index); | 2792 | (ulong) parent->bn, index); |
2803 | } | 2793 | } |
2804 | 2794 | ||
2805 | /* unpin the parent page */ | 2795 | /* unpin the parent page */ |
2806 | XT_PUTPAGE(mp); | 2796 | XT_PUTPAGE(mp); |
2807 | 2797 | ||
2808 | /* exit propagation up */ | 2798 | /* exit propagation up */ |
2809 | break; | 2799 | break; |
2810 | } | 2800 | } |
2811 | 2801 | ||
2812 | return 0; | 2802 | return 0; |
2813 | } | 2803 | } |
2814 | 2804 | ||
2815 | 2805 | ||
2816 | /* | 2806 | /* |
2817 | * NAME: xtRelocate() | 2807 | * NAME: xtRelocate() |
2818 | * | 2808 | * |
2819 | * FUNCTION: relocate xtpage or data extent of regular file; | 2809 | * FUNCTION: relocate xtpage or data extent of regular file; |
2820 | * This function is mainly used by defragfs utility. | 2810 | * This function is mainly used by defragfs utility. |
2821 | * | 2811 | * |
2822 | * NOTE: This routine does not have the logic to handle | 2812 | * NOTE: This routine does not have the logic to handle |
2823 | * uncommitted allocated extent. The caller should call | 2813 | * uncommitted allocated extent. The caller should call |
2824 | * txCommit() to commit all the allocation before call | 2814 | * txCommit() to commit all the allocation before call |
2825 | * this routine. | 2815 | * this routine. |
2826 | */ | 2816 | */ |
2827 | int | 2817 | int |
2828 | xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | 2818 | xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ |
2829 | s64 nxaddr, /* new xaddr */ | 2819 | s64 nxaddr, /* new xaddr */ |
2830 | int xtype) | 2820 | int xtype) |
2831 | { /* extent type: XTPAGE or DATAEXT */ | 2821 | { /* extent type: XTPAGE or DATAEXT */ |
2832 | int rc = 0; | 2822 | int rc = 0; |
2833 | struct tblock *tblk; | 2823 | struct tblock *tblk; |
2834 | struct tlock *tlck; | 2824 | struct tlock *tlck; |
2835 | struct xtlock *xtlck; | 2825 | struct xtlock *xtlck; |
2836 | struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ | 2826 | struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ |
2837 | xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ | 2827 | xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ |
2838 | xad_t *xad; | 2828 | xad_t *xad; |
2839 | pxd_t *pxd; | 2829 | pxd_t *pxd; |
2840 | s64 xoff, xsize; | 2830 | s64 xoff, xsize; |
2841 | int xlen; | 2831 | int xlen; |
2842 | s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; | 2832 | s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; |
2843 | cbuf_t *cp; | 2833 | cbuf_t *cp; |
2844 | s64 offset, nbytes, nbrd, pno; | 2834 | s64 offset, nbytes, nbrd, pno; |
2845 | int nb, npages, nblks; | 2835 | int nb, npages, nblks; |
2846 | s64 bn; | 2836 | s64 bn; |
2847 | int cmp; | 2837 | int cmp; |
2848 | int index; | 2838 | int index; |
2849 | struct pxd_lock *pxdlock; | 2839 | struct pxd_lock *pxdlock; |
2850 | struct btstack btstack; /* traverse stack */ | 2840 | struct btstack btstack; /* traverse stack */ |
2851 | 2841 | ||
2852 | xtype = xtype & EXTENT_TYPE; | 2842 | xtype = xtype & EXTENT_TYPE; |
2853 | 2843 | ||
2854 | xoff = offsetXAD(oxad); | 2844 | xoff = offsetXAD(oxad); |
2855 | oxaddr = addressXAD(oxad); | 2845 | oxaddr = addressXAD(oxad); |
2856 | xlen = lengthXAD(oxad); | 2846 | xlen = lengthXAD(oxad); |
2857 | 2847 | ||
2858 | /* validate extent offset */ | 2848 | /* validate extent offset */ |
2859 | offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; | 2849 | offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; |
2860 | if (offset >= ip->i_size) | 2850 | if (offset >= ip->i_size) |
2861 | return -ESTALE; /* stale extent */ | 2851 | return -ESTALE; /* stale extent */ |
2862 | 2852 | ||
2863 | jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", | 2853 | jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", |
2864 | xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); | 2854 | xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); |
2865 | 2855 | ||
2866 | /* | 2856 | /* |
2867 | * 1. get and validate the parent xtpage/xad entry | 2857 | * 1. get and validate the parent xtpage/xad entry |
2868 | * covering the source extent to be relocated; | 2858 | * covering the source extent to be relocated; |
2869 | */ | 2859 | */ |
2870 | if (xtype == DATAEXT) { | 2860 | if (xtype == DATAEXT) { |
2871 | /* search in leaf entry */ | 2861 | /* search in leaf entry */ |
2872 | rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); | 2862 | rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); |
2873 | if (rc) | 2863 | if (rc) |
2874 | return rc; | 2864 | return rc; |
2875 | 2865 | ||
2876 | /* retrieve search result */ | 2866 | /* retrieve search result */ |
2877 | XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); | 2867 | XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); |
2878 | 2868 | ||
2879 | if (cmp) { | 2869 | if (cmp) { |
2880 | XT_PUTPAGE(pmp); | 2870 | XT_PUTPAGE(pmp); |
2881 | return -ESTALE; | 2871 | return -ESTALE; |
2882 | } | 2872 | } |
2883 | 2873 | ||
2884 | /* validate for exact match with a single entry */ | 2874 | /* validate for exact match with a single entry */ |
2885 | xad = &pp->xad[index]; | 2875 | xad = &pp->xad[index]; |
2886 | if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { | 2876 | if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { |
2887 | XT_PUTPAGE(pmp); | 2877 | XT_PUTPAGE(pmp); |
2888 | return -ESTALE; | 2878 | return -ESTALE; |
2889 | } | 2879 | } |
2890 | } else { /* (xtype == XTPAGE) */ | 2880 | } else { /* (xtype == XTPAGE) */ |
2891 | 2881 | ||
2892 | /* search in internal entry */ | 2882 | /* search in internal entry */ |
2893 | rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); | 2883 | rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); |
2894 | if (rc) | 2884 | if (rc) |
2895 | return rc; | 2885 | return rc; |
2896 | 2886 | ||
2897 | /* retrieve search result */ | 2887 | /* retrieve search result */ |
2898 | XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); | 2888 | XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); |
2899 | 2889 | ||
2900 | if (cmp) { | 2890 | if (cmp) { |
2901 | XT_PUTPAGE(pmp); | 2891 | XT_PUTPAGE(pmp); |
2902 | return -ESTALE; | 2892 | return -ESTALE; |
2903 | } | 2893 | } |
2904 | 2894 | ||
2905 | /* xtSearchNode() validated for exact match with a single entry | 2895 | /* xtSearchNode() validated for exact match with a single entry |
2906 | */ | 2896 | */ |
2907 | xad = &pp->xad[index]; | 2897 | xad = &pp->xad[index]; |
2908 | } | 2898 | } |
2909 | jfs_info("xtRelocate: parent xad entry validated."); | 2899 | jfs_info("xtRelocate: parent xad entry validated."); |
2910 | 2900 | ||
2911 | /* | 2901 | /* |
2912 | * 2. relocate the extent | 2902 | * 2. relocate the extent |
2913 | */ | 2903 | */ |
2914 | if (xtype == DATAEXT) { | 2904 | if (xtype == DATAEXT) { |
2915 | /* if the extent is allocated-but-not-recorded | 2905 | /* if the extent is allocated-but-not-recorded |
2916 | * there is no real data to be moved in this extent, | 2906 | * there is no real data to be moved in this extent, |
2917 | */ | 2907 | */ |
2918 | if (xad->flag & XAD_NOTRECORDED) | 2908 | if (xad->flag & XAD_NOTRECORDED) |
2919 | goto out; | 2909 | goto out; |
2920 | else | 2910 | else |
2921 | /* release xtpage for cmRead()/xtLookup() */ | 2911 | /* release xtpage for cmRead()/xtLookup() */ |
2922 | XT_PUTPAGE(pmp); | 2912 | XT_PUTPAGE(pmp); |
2923 | 2913 | ||
2924 | /* | 2914 | /* |
2925 | * cmRelocate() | 2915 | * cmRelocate() |
2926 | * | 2916 | * |
2927 | * copy target data pages to be relocated; | 2917 | * copy target data pages to be relocated; |
2928 | * | 2918 | * |
2929 | * data extent must start at page boundary and | 2919 | * data extent must start at page boundary and |
2930 | * multiple of page size (except the last data extent); | 2920 | * multiple of page size (except the last data extent); |
2931 | * read in each page of the source data extent into cbuf, | 2921 | * read in each page of the source data extent into cbuf, |
2932 | * update the cbuf extent descriptor of the page to be | 2922 | * update the cbuf extent descriptor of the page to be |
2933 | * homeward bound to new dst data extent | 2923 | * homeward bound to new dst data extent |
2934 | * copy the data from the old extent to new extent. | 2924 | * copy the data from the old extent to new extent. |
2935 | * copy is essential for compressed files to avoid problems | 2925 | * copy is essential for compressed files to avoid problems |
2936 | * that can arise if there was a change in compression | 2926 | * that can arise if there was a change in compression |
2937 | * algorithms. | 2927 | * algorithms. |
2938 | * it is a good strategy because it may disrupt cache | 2928 | * it is a good strategy because it may disrupt cache |
2939 | * policy to keep the pages in memory afterwards. | 2929 | * policy to keep the pages in memory afterwards. |
2940 | */ | 2930 | */ |
2941 | offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; | 2931 | offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; |
2942 | assert((offset & CM_OFFSET) == 0); | 2932 | assert((offset & CM_OFFSET) == 0); |
2943 | nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; | 2933 | nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; |
2944 | pno = offset >> CM_L2BSIZE; | 2934 | pno = offset >> CM_L2BSIZE; |
2945 | npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; | 2935 | npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; |
2946 | /* | 2936 | /* |
2947 | npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - | 2937 | npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - |
2948 | (offset >> CM_L2BSIZE) + 1; | 2938 | (offset >> CM_L2BSIZE) + 1; |
2949 | */ | 2939 | */ |
2950 | sxaddr = oxaddr; | 2940 | sxaddr = oxaddr; |
2951 | dxaddr = nxaddr; | 2941 | dxaddr = nxaddr; |
2952 | 2942 | ||
2953 | /* process the request one cache buffer at a time */ | 2943 | /* process the request one cache buffer at a time */ |
2954 | for (nbrd = 0; nbrd < nbytes; nbrd += nb, | 2944 | for (nbrd = 0; nbrd < nbytes; nbrd += nb, |
2955 | offset += nb, pno++, npages--) { | 2945 | offset += nb, pno++, npages--) { |
2956 | /* compute page size */ | 2946 | /* compute page size */ |
2957 | nb = min(nbytes - nbrd, CM_BSIZE); | 2947 | nb = min(nbytes - nbrd, CM_BSIZE); |
2958 | 2948 | ||
2959 | /* get the cache buffer of the page */ | 2949 | /* get the cache buffer of the page */ |
2960 | if (rc = cmRead(ip, offset, npages, &cp)) | 2950 | if (rc = cmRead(ip, offset, npages, &cp)) |
2961 | break; | 2951 | break; |
2962 | 2952 | ||
2963 | assert(addressPXD(&cp->cm_pxd) == sxaddr); | 2953 | assert(addressPXD(&cp->cm_pxd) == sxaddr); |
2964 | assert(!cp->cm_modified); | 2954 | assert(!cp->cm_modified); |
2965 | 2955 | ||
2966 | /* bind buffer with the new extent address */ | 2956 | /* bind buffer with the new extent address */ |
2967 | nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; | 2957 | nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; |
2968 | cmSetXD(ip, cp, pno, dxaddr, nblks); | 2958 | cmSetXD(ip, cp, pno, dxaddr, nblks); |
2969 | 2959 | ||
2970 | /* release the cbuf, mark it as modified */ | 2960 | /* release the cbuf, mark it as modified */ |
2971 | cmPut(cp, true); | 2961 | cmPut(cp, true); |
2972 | 2962 | ||
2973 | dxaddr += nblks; | 2963 | dxaddr += nblks; |
2974 | sxaddr += nblks; | 2964 | sxaddr += nblks; |
2975 | } | 2965 | } |
2976 | 2966 | ||
2977 | /* get back parent page */ | 2967 | /* get back parent page */ |
2978 | if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) | 2968 | if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) |
2979 | return rc; | 2969 | return rc; |
2980 | 2970 | ||
2981 | XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); | 2971 | XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); |
2982 | jfs_info("xtRelocate: target data extent relocated."); | 2972 | jfs_info("xtRelocate: target data extent relocated."); |
2983 | } else { /* (xtype == XTPAGE) */ | 2973 | } else { /* (xtype == XTPAGE) */ |
2984 | 2974 | ||
2985 | /* | 2975 | /* |
2986 | * read in the target xtpage from the source extent; | 2976 | * read in the target xtpage from the source extent; |
2987 | */ | 2977 | */ |
2988 | XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); | 2978 | XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); |
2989 | if (rc) { | 2979 | if (rc) { |
2990 | XT_PUTPAGE(pmp); | 2980 | XT_PUTPAGE(pmp); |
2991 | return rc; | 2981 | return rc; |
2992 | } | 2982 | } |
2993 | 2983 | ||
2994 | /* | 2984 | /* |
2995 | * read in sibling pages if any to update sibling pointers; | 2985 | * read in sibling pages if any to update sibling pointers; |
2996 | */ | 2986 | */ |
2997 | rmp = NULL; | 2987 | rmp = NULL; |
2998 | if (p->header.next) { | 2988 | if (p->header.next) { |
2999 | nextbn = le64_to_cpu(p->header.next); | 2989 | nextbn = le64_to_cpu(p->header.next); |
3000 | XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); | 2990 | XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); |
3001 | if (rc) { | 2991 | if (rc) { |
3002 | XT_PUTPAGE(pmp); | 2992 | XT_PUTPAGE(pmp); |
3003 | XT_PUTPAGE(mp); | 2993 | XT_PUTPAGE(mp); |
3004 | return (rc); | 2994 | return (rc); |
3005 | } | 2995 | } |
3006 | } | 2996 | } |
3007 | 2997 | ||
3008 | lmp = NULL; | 2998 | lmp = NULL; |
3009 | if (p->header.prev) { | 2999 | if (p->header.prev) { |
3010 | prevbn = le64_to_cpu(p->header.prev); | 3000 | prevbn = le64_to_cpu(p->header.prev); |
3011 | XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); | 3001 | XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); |
3012 | if (rc) { | 3002 | if (rc) { |
3013 | XT_PUTPAGE(pmp); | 3003 | XT_PUTPAGE(pmp); |
3014 | XT_PUTPAGE(mp); | 3004 | XT_PUTPAGE(mp); |
3015 | if (rmp) | 3005 | if (rmp) |
3016 | XT_PUTPAGE(rmp); | 3006 | XT_PUTPAGE(rmp); |
3017 | return (rc); | 3007 | return (rc); |
3018 | } | 3008 | } |
3019 | } | 3009 | } |
3020 | 3010 | ||
3021 | /* at this point, all xtpages to be updated are in memory */ | 3011 | /* at this point, all xtpages to be updated are in memory */ |
3022 | 3012 | ||
3023 | /* | 3013 | /* |
3024 | * update sibling pointers of sibling xtpages if any; | 3014 | * update sibling pointers of sibling xtpages if any; |
3025 | */ | 3015 | */ |
3026 | if (lmp) { | 3016 | if (lmp) { |
3027 | BT_MARK_DIRTY(lmp, ip); | 3017 | BT_MARK_DIRTY(lmp, ip); |
3028 | tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); | 3018 | tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); |
3029 | lp->header.next = cpu_to_le64(nxaddr); | 3019 | lp->header.next = cpu_to_le64(nxaddr); |
3030 | XT_PUTPAGE(lmp); | 3020 | XT_PUTPAGE(lmp); |
3031 | } | 3021 | } |
3032 | 3022 | ||
3033 | if (rmp) { | 3023 | if (rmp) { |
3034 | BT_MARK_DIRTY(rmp, ip); | 3024 | BT_MARK_DIRTY(rmp, ip); |
3035 | tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); | 3025 | tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); |
3036 | rp->header.prev = cpu_to_le64(nxaddr); | 3026 | rp->header.prev = cpu_to_le64(nxaddr); |
3037 | XT_PUTPAGE(rmp); | 3027 | XT_PUTPAGE(rmp); |
3038 | } | 3028 | } |
3039 | 3029 | ||
3040 | /* | 3030 | /* |
3041 | * update the target xtpage to be relocated | 3031 | * update the target xtpage to be relocated |
3042 | * | 3032 | * |
3043 | * update the self address of the target page | 3033 | * update the self address of the target page |
3044 | * and write to destination extent; | 3034 | * and write to destination extent; |
3045 | * redo image covers the whole xtpage since it is new page | 3035 | * redo image covers the whole xtpage since it is new page |
3046 | * to the destination extent; | 3036 | * to the destination extent; |
3047 | * update of bmap for the free of source extent | 3037 | * update of bmap for the free of source extent |
3048 | * of the target xtpage itself: | 3038 | * of the target xtpage itself: |
3049 | * update of bmap for the allocation of destination extent | 3039 | * update of bmap for the allocation of destination extent |
3050 | * of the target xtpage itself: | 3040 | * of the target xtpage itself: |
3051 | * update of bmap for the extents covered by xad entries in | 3041 | * update of bmap for the extents covered by xad entries in |
3052 | * the target xtpage is not necessary since they are not | 3042 | * the target xtpage is not necessary since they are not |
3053 | * updated; | 3043 | * updated; |
3054 | * if not committed before this relocation, | 3044 | * if not committed before this relocation, |
3055 | * target page may contain XAD_NEW entries which must | 3045 | * target page may contain XAD_NEW entries which must |
3056 | * be scanned for bmap update (logredo() always | 3046 | * be scanned for bmap update (logredo() always |
3057 | * scan xtpage REDOPAGE image for bmap update); | 3047 | * scan xtpage REDOPAGE image for bmap update); |
3058 | * if committed before this relocation (tlckRELOCATE), | 3048 | * if committed before this relocation (tlckRELOCATE), |
3059 | * scan may be skipped by commit() and logredo(); | 3049 | * scan may be skipped by commit() and logredo(); |
3060 | */ | 3050 | */ |
3061 | BT_MARK_DIRTY(mp, ip); | 3051 | BT_MARK_DIRTY(mp, ip); |
3062 | /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ | 3052 | /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ |
3063 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); | 3053 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); |
3064 | xtlck = (struct xtlock *) & tlck->lock; | 3054 | xtlck = (struct xtlock *) & tlck->lock; |
3065 | 3055 | ||
3066 | /* update the self address in the xtpage header */ | 3056 | /* update the self address in the xtpage header */ |
3067 | pxd = &p->header.self; | 3057 | pxd = &p->header.self; |
3068 | PXDaddress(pxd, nxaddr); | 3058 | PXDaddress(pxd, nxaddr); |
3069 | 3059 | ||
3070 | /* linelock for the after image of the whole page */ | 3060 | /* linelock for the after image of the whole page */ |
3071 | xtlck->lwm.length = | 3061 | xtlck->lwm.length = |
3072 | le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; | 3062 | le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; |
3073 | 3063 | ||
3074 | /* update the buffer extent descriptor of target xtpage */ | 3064 | /* update the buffer extent descriptor of target xtpage */ |
3075 | xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; | 3065 | xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; |
3076 | bmSetXD(mp, nxaddr, xsize); | 3066 | bmSetXD(mp, nxaddr, xsize); |
3077 | 3067 | ||
3078 | /* unpin the target page to new homeward bound */ | 3068 | /* unpin the target page to new homeward bound */ |
3079 | XT_PUTPAGE(mp); | 3069 | XT_PUTPAGE(mp); |
3080 | jfs_info("xtRelocate: target xtpage relocated."); | 3070 | jfs_info("xtRelocate: target xtpage relocated."); |
3081 | } | 3071 | } |
3082 | 3072 | ||
3083 | /* | 3073 | /* |
3084 | * 3. acquire maplock for the source extent to be freed; | 3074 | * 3. acquire maplock for the source extent to be freed; |
3085 | * | 3075 | * |
3086 | * acquire a maplock saving the src relocated extent address; | 3076 | * acquire a maplock saving the src relocated extent address; |
3087 | * to free of the extent at commit time; | 3077 | * to free of the extent at commit time; |
3088 | */ | 3078 | */ |
3089 | out: | 3079 | out: |
3090 | /* if DATAEXT relocation, write a LOG_UPDATEMAP record for | 3080 | /* if DATAEXT relocation, write a LOG_UPDATEMAP record for |
3091 | * free PXD of the source data extent (logredo() will update | 3081 | * free PXD of the source data extent (logredo() will update |
3092 | * bmap for free of source data extent), and update bmap for | 3082 | * bmap for free of source data extent), and update bmap for |
3093 | * free of the source data extent; | 3083 | * free of the source data extent; |
3094 | */ | 3084 | */ |
3095 | if (xtype == DATAEXT) | 3085 | if (xtype == DATAEXT) |
3096 | tlck = txMaplock(tid, ip, tlckMAP); | 3086 | tlck = txMaplock(tid, ip, tlckMAP); |
3097 | /* if XTPAGE relocation, write a LOG_NOREDOPAGE record | 3087 | /* if XTPAGE relocation, write a LOG_NOREDOPAGE record |
3098 | * for the source xtpage (logredo() will init NoRedoPage | 3088 | * for the source xtpage (logredo() will init NoRedoPage |
3099 | * filter and will also update bmap for free of the source | 3089 | * filter and will also update bmap for free of the source |
3100 | * xtpage), and update bmap for free of the source xtpage; | 3090 | * xtpage), and update bmap for free of the source xtpage; |
3101 | * N.B. We use tlckMAP instead of tlkcXTREE because there | 3091 | * N.B. We use tlckMAP instead of tlkcXTREE because there |
3102 | * is no buffer associated with this lock since the buffer | 3092 | * is no buffer associated with this lock since the buffer |
3103 | * has been redirected to the target location. | 3093 | * has been redirected to the target location. |
3104 | */ | 3094 | */ |
3105 | else /* (xtype == XTPAGE) */ | 3095 | else /* (xtype == XTPAGE) */ |
3106 | tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); | 3096 | tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); |
3107 | 3097 | ||
3108 | pxdlock = (struct pxd_lock *) & tlck->lock; | 3098 | pxdlock = (struct pxd_lock *) & tlck->lock; |
3109 | pxdlock->flag = mlckFREEPXD; | 3099 | pxdlock->flag = mlckFREEPXD; |
3110 | PXDaddress(&pxdlock->pxd, oxaddr); | 3100 | PXDaddress(&pxdlock->pxd, oxaddr); |
3111 | PXDlength(&pxdlock->pxd, xlen); | 3101 | PXDlength(&pxdlock->pxd, xlen); |
3112 | pxdlock->index = 1; | 3102 | pxdlock->index = 1; |
3113 | 3103 | ||
3114 | /* | 3104 | /* |
3115 | * 4. update the parent xad entry for relocation; | 3105 | * 4. update the parent xad entry for relocation; |
3116 | * | 3106 | * |
3117 | * acquire tlck for the parent entry with XAD_NEW as entry | 3107 | * acquire tlck for the parent entry with XAD_NEW as entry |
3118 | * update which will write LOG_REDOPAGE and update bmap for | 3108 | * update which will write LOG_REDOPAGE and update bmap for |
3119 | * allocation of XAD_NEW destination extent; | 3109 | * allocation of XAD_NEW destination extent; |
3120 | */ | 3110 | */ |
3121 | jfs_info("xtRelocate: update parent xad entry."); | 3111 | jfs_info("xtRelocate: update parent xad entry."); |
3122 | BT_MARK_DIRTY(pmp, ip); | 3112 | BT_MARK_DIRTY(pmp, ip); |
3123 | tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); | 3113 | tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); |
3124 | xtlck = (struct xtlock *) & tlck->lock; | 3114 | xtlck = (struct xtlock *) & tlck->lock; |
3125 | 3115 | ||
3126 | /* update the XAD with the new destination extent; */ | 3116 | /* update the XAD with the new destination extent; */ |
3127 | xad = &pp->xad[index]; | 3117 | xad = &pp->xad[index]; |
3128 | xad->flag |= XAD_NEW; | 3118 | xad->flag |= XAD_NEW; |
3129 | XADaddress(xad, nxaddr); | 3119 | XADaddress(xad, nxaddr); |
3130 | 3120 | ||
3131 | xtlck->lwm.offset = min(index, xtlck->lwm.offset); | 3121 | xtlck->lwm.offset = min(index, xtlck->lwm.offset); |
3132 | xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - | 3122 | xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - |
3133 | xtlck->lwm.offset; | 3123 | xtlck->lwm.offset; |
3134 | 3124 | ||
3135 | /* unpin the parent xtpage */ | 3125 | /* unpin the parent xtpage */ |
3136 | XT_PUTPAGE(pmp); | 3126 | XT_PUTPAGE(pmp); |
3137 | 3127 | ||
3138 | return rc; | 3128 | return rc; |
3139 | } | 3129 | } |
3140 | 3130 | ||
3141 | 3131 | ||
3142 | /* | 3132 | /* |
3143 | * xtSearchNode() | 3133 | * xtSearchNode() |
3144 | * | 3134 | * |
3145 | * function: search for the internal xad entry covering specified extent. | 3135 | * function: search for the internal xad entry covering specified extent. |
3146 | * This function is mainly used by defragfs utility. | 3136 | * This function is mainly used by defragfs utility. |
3147 | * | 3137 | * |
3148 | * parameters: | 3138 | * parameters: |
3149 | * ip - file object; | 3139 | * ip - file object; |
3150 | * xad - extent to find; | 3140 | * xad - extent to find; |
3151 | * cmpp - comparison result: | 3141 | * cmpp - comparison result: |
3152 | * btstack - traverse stack; | 3142 | * btstack - traverse stack; |
3153 | * flag - search process flag; | 3143 | * flag - search process flag; |
3154 | * | 3144 | * |
3155 | * returns: | 3145 | * returns: |
3156 | * btstack contains (bn, index) of search path traversed to the entry. | 3146 | * btstack contains (bn, index) of search path traversed to the entry. |
3157 | * *cmpp is set to result of comparison with the entry returned. | 3147 | * *cmpp is set to result of comparison with the entry returned. |
3158 | * the page containing the entry is pinned at exit. | 3148 | * the page containing the entry is pinned at exit. |
3159 | */ | 3149 | */ |
3160 | static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ | 3150 | static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ |
3161 | int *cmpp, struct btstack * btstack, int flag) | 3151 | int *cmpp, struct btstack * btstack, int flag) |
3162 | { | 3152 | { |
3163 | int rc = 0; | 3153 | int rc = 0; |
3164 | s64 xoff, xaddr; | 3154 | s64 xoff, xaddr; |
3165 | int xlen; | 3155 | int xlen; |
3166 | int cmp = 1; /* init for empty page */ | 3156 | int cmp = 1; /* init for empty page */ |
3167 | s64 bn; /* block number */ | 3157 | s64 bn; /* block number */ |
3168 | struct metapage *mp; /* meta-page buffer */ | 3158 | struct metapage *mp; /* meta-page buffer */ |
3169 | xtpage_t *p; /* page */ | 3159 | xtpage_t *p; /* page */ |
3170 | int base, index, lim; | 3160 | int base, index, lim; |
3171 | struct btframe *btsp; | 3161 | struct btframe *btsp; |
3172 | s64 t64; | 3162 | s64 t64; |
3173 | 3163 | ||
3174 | BT_CLR(btstack); | 3164 | BT_CLR(btstack); |
3175 | 3165 | ||
3176 | xoff = offsetXAD(xad); | 3166 | xoff = offsetXAD(xad); |
3177 | xlen = lengthXAD(xad); | 3167 | xlen = lengthXAD(xad); |
3178 | xaddr = addressXAD(xad); | 3168 | xaddr = addressXAD(xad); |
3179 | 3169 | ||
3180 | /* | 3170 | /* |
3181 | * search down tree from root: | 3171 | * search down tree from root: |
3182 | * | 3172 | * |
3183 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of | 3173 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of |
3184 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. | 3174 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. |
3185 | * | 3175 | * |
3186 | * if entry with search key K is not found | 3176 | * if entry with search key K is not found |
3187 | * internal page search find the entry with largest key Ki | 3177 | * internal page search find the entry with largest key Ki |
3188 | * less than K which point to the child page to search; | 3178 | * less than K which point to the child page to search; |
3189 | * leaf page search find the entry with smallest key Kj | 3179 | * leaf page search find the entry with smallest key Kj |
3190 | * greater than K so that the returned index is the position of | 3180 | * greater than K so that the returned index is the position of |
3191 | * the entry to be shifted right for insertion of new entry. | 3181 | * the entry to be shifted right for insertion of new entry. |
3192 | * for empty tree, search key is greater than any key of the tree. | 3182 | * for empty tree, search key is greater than any key of the tree. |
3193 | * | 3183 | * |
3194 | * by convention, root bn = 0. | 3184 | * by convention, root bn = 0. |
3195 | */ | 3185 | */ |
3196 | for (bn = 0;;) { | 3186 | for (bn = 0;;) { |
3197 | /* get/pin the page to search */ | 3187 | /* get/pin the page to search */ |
3198 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 3188 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
3199 | if (rc) | 3189 | if (rc) |
3200 | return rc; | 3190 | return rc; |
3201 | if (p->header.flag & BT_LEAF) { | 3191 | if (p->header.flag & BT_LEAF) { |
3202 | XT_PUTPAGE(mp); | 3192 | XT_PUTPAGE(mp); |
3203 | return -ESTALE; | 3193 | return -ESTALE; |
3204 | } | 3194 | } |
3205 | 3195 | ||
3206 | lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; | 3196 | lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; |
3207 | 3197 | ||
3208 | /* | 3198 | /* |
3209 | * binary search with search key K on the current page | 3199 | * binary search with search key K on the current page |
3210 | */ | 3200 | */ |
3211 | for (base = XTENTRYSTART; lim; lim >>= 1) { | 3201 | for (base = XTENTRYSTART; lim; lim >>= 1) { |
3212 | index = base + (lim >> 1); | 3202 | index = base + (lim >> 1); |
3213 | 3203 | ||
3214 | XT_CMP(cmp, xoff, &p->xad[index], t64); | 3204 | XT_CMP(cmp, xoff, &p->xad[index], t64); |
3215 | if (cmp == 0) { | 3205 | if (cmp == 0) { |
3216 | /* | 3206 | /* |
3217 | * search hit | 3207 | * search hit |
3218 | * | 3208 | * |
3219 | * verify for exact match; | 3209 | * verify for exact match; |
3220 | */ | 3210 | */ |
3221 | if (xaddr == addressXAD(&p->xad[index]) && | 3211 | if (xaddr == addressXAD(&p->xad[index]) && |
3222 | xoff == offsetXAD(&p->xad[index])) { | 3212 | xoff == offsetXAD(&p->xad[index])) { |
3223 | *cmpp = cmp; | 3213 | *cmpp = cmp; |
3224 | 3214 | ||
3225 | /* save search result */ | 3215 | /* save search result */ |
3226 | btsp = btstack->top; | 3216 | btsp = btstack->top; |
3227 | btsp->bn = bn; | 3217 | btsp->bn = bn; |
3228 | btsp->index = index; | 3218 | btsp->index = index; |
3229 | btsp->mp = mp; | 3219 | btsp->mp = mp; |
3230 | 3220 | ||
3231 | return 0; | 3221 | return 0; |
3232 | } | 3222 | } |
3233 | 3223 | ||
3234 | /* descend/search its child page */ | 3224 | /* descend/search its child page */ |
3235 | goto next; | 3225 | goto next; |
3236 | } | 3226 | } |
3237 | 3227 | ||
3238 | if (cmp > 0) { | 3228 | if (cmp > 0) { |
3239 | base = index + 1; | 3229 | base = index + 1; |
3240 | --lim; | 3230 | --lim; |
3241 | } | 3231 | } |
3242 | } | 3232 | } |
3243 | 3233 | ||
3244 | /* | 3234 | /* |
3245 | * search miss - non-leaf page: | 3235 | * search miss - non-leaf page: |
3246 | * | 3236 | * |
3247 | * base is the smallest index with key (Kj) greater than | 3237 | * base is the smallest index with key (Kj) greater than |
3248 | * search key (K) and may be zero or maxentry index. | 3238 | * search key (K) and may be zero or maxentry index. |
3249 | * if base is non-zero, decrement base by one to get the parent | 3239 | * if base is non-zero, decrement base by one to get the parent |
3250 | * entry of the child page to search. | 3240 | * entry of the child page to search. |
3251 | */ | 3241 | */ |
3252 | index = base ? base - 1 : base; | 3242 | index = base ? base - 1 : base; |
3253 | 3243 | ||
3254 | /* | 3244 | /* |
3255 | * go down to child page | 3245 | * go down to child page |
3256 | */ | 3246 | */ |
3257 | next: | 3247 | next: |
3258 | /* get the child page block number */ | 3248 | /* get the child page block number */ |
3259 | bn = addressXAD(&p->xad[index]); | 3249 | bn = addressXAD(&p->xad[index]); |
3260 | 3250 | ||
3261 | /* unpin the parent page */ | 3251 | /* unpin the parent page */ |
3262 | XT_PUTPAGE(mp); | 3252 | XT_PUTPAGE(mp); |
3263 | } | 3253 | } |
3264 | } | 3254 | } |
3265 | 3255 | ||
3266 | 3256 | ||
3267 | /* | 3257 | /* |
3268 | * xtRelink() | 3258 | * xtRelink() |
3269 | * | 3259 | * |
3270 | * function: | 3260 | * function: |
3271 | * link around a freed page. | 3261 | * link around a freed page. |
3272 | * | 3262 | * |
3273 | * Parameter: | 3263 | * Parameter: |
3274 | * int tid, | 3264 | * int tid, |
3275 | * struct inode *ip, | 3265 | * struct inode *ip, |
3276 | * xtpage_t *p) | 3266 | * xtpage_t *p) |
3277 | * | 3267 | * |
3278 | * returns: | 3268 | * returns: |
3279 | */ | 3269 | */ |
3280 | static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) | 3270 | static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) |
3281 | { | 3271 | { |
3282 | int rc = 0; | 3272 | int rc = 0; |
3283 | struct metapage *mp; | 3273 | struct metapage *mp; |
3284 | s64 nextbn, prevbn; | 3274 | s64 nextbn, prevbn; |
3285 | struct tlock *tlck; | 3275 | struct tlock *tlck; |
3286 | 3276 | ||
3287 | nextbn = le64_to_cpu(p->header.next); | 3277 | nextbn = le64_to_cpu(p->header.next); |
3288 | prevbn = le64_to_cpu(p->header.prev); | 3278 | prevbn = le64_to_cpu(p->header.prev); |
3289 | 3279 | ||
3290 | /* update prev pointer of the next page */ | 3280 | /* update prev pointer of the next page */ |
3291 | if (nextbn != 0) { | 3281 | if (nextbn != 0) { |
3292 | XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); | 3282 | XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); |
3293 | if (rc) | 3283 | if (rc) |
3294 | return rc; | 3284 | return rc; |
3295 | 3285 | ||
3296 | /* | 3286 | /* |
3297 | * acquire a transaction lock on the page; | 3287 | * acquire a transaction lock on the page; |
3298 | * | 3288 | * |
3299 | * action: update prev pointer; | 3289 | * action: update prev pointer; |
3300 | */ | 3290 | */ |
3301 | BT_MARK_DIRTY(mp, ip); | 3291 | BT_MARK_DIRTY(mp, ip); |
3302 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); | 3292 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); |
3303 | 3293 | ||
3304 | /* the page may already have been tlock'd */ | 3294 | /* the page may already have been tlock'd */ |
3305 | 3295 | ||
3306 | p->header.prev = cpu_to_le64(prevbn); | 3296 | p->header.prev = cpu_to_le64(prevbn); |
3307 | 3297 | ||
3308 | XT_PUTPAGE(mp); | 3298 | XT_PUTPAGE(mp); |
3309 | } | 3299 | } |
3310 | 3300 | ||
3311 | /* update next pointer of the previous page */ | 3301 | /* update next pointer of the previous page */ |
3312 | if (prevbn != 0) { | 3302 | if (prevbn != 0) { |
3313 | XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); | 3303 | XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); |
3314 | if (rc) | 3304 | if (rc) |
3315 | return rc; | 3305 | return rc; |
3316 | 3306 | ||
3317 | /* | 3307 | /* |
3318 | * acquire a transaction lock on the page; | 3308 | * acquire a transaction lock on the page; |
3319 | * | 3309 | * |
3320 | * action: update next pointer; | 3310 | * action: update next pointer; |
3321 | */ | 3311 | */ |
3322 | BT_MARK_DIRTY(mp, ip); | 3312 | BT_MARK_DIRTY(mp, ip); |
3323 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); | 3313 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); |
3324 | 3314 | ||
3325 | /* the page may already have been tlock'd */ | 3315 | /* the page may already have been tlock'd */ |
3326 | 3316 | ||
3327 | p->header.next = le64_to_cpu(nextbn); | 3317 | p->header.next = le64_to_cpu(nextbn); |
3328 | 3318 | ||
3329 | XT_PUTPAGE(mp); | 3319 | XT_PUTPAGE(mp); |
3330 | } | 3320 | } |
3331 | 3321 | ||
3332 | return 0; | 3322 | return 0; |
3333 | } | 3323 | } |
3334 | #endif /* _STILL_TO_PORT */ | 3324 | #endif /* _STILL_TO_PORT */ |
3335 | 3325 | ||
3336 | 3326 | ||
3337 | /* | 3327 | /* |
3338 | * xtInitRoot() | 3328 | * xtInitRoot() |
3339 | * | 3329 | * |
3340 | * initialize file root (inline in inode) | 3330 | * initialize file root (inline in inode) |
3341 | */ | 3331 | */ |
3342 | void xtInitRoot(tid_t tid, struct inode *ip) | 3332 | void xtInitRoot(tid_t tid, struct inode *ip) |
3343 | { | 3333 | { |
3344 | xtpage_t *p; | 3334 | xtpage_t *p; |
3345 | 3335 | ||
3346 | /* | 3336 | /* |
3347 | * acquire a transaction lock on the root | 3337 | * acquire a transaction lock on the root |
3348 | * | 3338 | * |
3349 | * action: | 3339 | * action: |
3350 | */ | 3340 | */ |
3351 | txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, | 3341 | txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, |
3352 | tlckXTREE | tlckNEW); | 3342 | tlckXTREE | tlckNEW); |
3353 | p = &JFS_IP(ip)->i_xtroot; | 3343 | p = &JFS_IP(ip)->i_xtroot; |
3354 | 3344 | ||
3355 | p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; | 3345 | p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; |
3356 | p->header.nextindex = cpu_to_le16(XTENTRYSTART); | 3346 | p->header.nextindex = cpu_to_le16(XTENTRYSTART); |
3357 | 3347 | ||
3358 | if (S_ISDIR(ip->i_mode)) | 3348 | if (S_ISDIR(ip->i_mode)) |
3359 | p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); | 3349 | p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); |
3360 | else { | 3350 | else { |
3361 | p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); | 3351 | p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); |
3362 | ip->i_size = 0; | 3352 | ip->i_size = 0; |
3363 | } | 3353 | } |
3364 | 3354 | ||
3365 | 3355 | ||
3366 | return; | 3356 | return; |
3367 | } | 3357 | } |
3368 | 3358 | ||
3369 | 3359 | ||
3370 | /* | 3360 | /* |
3371 | * We can run into a deadlock truncating a file with a large number of | 3361 | * We can run into a deadlock truncating a file with a large number of |
3372 | * xtree pages (large fragmented file). A robust fix would entail a | 3362 | * xtree pages (large fragmented file). A robust fix would entail a |
3373 | * reservation system where we would reserve a number of metadata pages | 3363 | * reservation system where we would reserve a number of metadata pages |
3374 | * and tlocks which we would be guaranteed without a deadlock. Without | 3364 | * and tlocks which we would be guaranteed without a deadlock. Without |
3375 | * this, a partial fix is to limit number of metadata pages we will lock | 3365 | * this, a partial fix is to limit number of metadata pages we will lock |
3376 | * in a single transaction. Currently we will truncate the file so that | 3366 | * in a single transaction. Currently we will truncate the file so that |
3377 | * no more than 50 leaf pages will be locked. The caller of xtTruncate | 3367 | * no more than 50 leaf pages will be locked. The caller of xtTruncate |
3378 | * will be responsible for ensuring that the current transaction gets | 3368 | * will be responsible for ensuring that the current transaction gets |
3379 | * committed, and that subsequent transactions are created to truncate | 3369 | * committed, and that subsequent transactions are created to truncate |
3380 | * the file further if needed. | 3370 | * the file further if needed. |
3381 | */ | 3371 | */ |
3382 | #define MAX_TRUNCATE_LEAVES 50 | 3372 | #define MAX_TRUNCATE_LEAVES 50 |
3383 | 3373 | ||
3384 | /* | 3374 | /* |
3385 | * xtTruncate() | 3375 | * xtTruncate() |
3386 | * | 3376 | * |
3387 | * function: | 3377 | * function: |
3388 | * traverse for truncation logging backward bottom up; | 3378 | * traverse for truncation logging backward bottom up; |
3389 | * terminate at the last extent entry at the current subtree | 3379 | * terminate at the last extent entry at the current subtree |
3390 | * root page covering new down size. | 3380 | * root page covering new down size. |
3391 | * truncation may occur within the last extent entry. | 3381 | * truncation may occur within the last extent entry. |
3392 | * | 3382 | * |
3393 | * parameter: | 3383 | * parameter: |
3394 | * int tid, | 3384 | * int tid, |
3395 | * struct inode *ip, | 3385 | * struct inode *ip, |
3396 | * s64 newsize, | 3386 | * s64 newsize, |
3397 | * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} | 3387 | * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} |
3398 | * | 3388 | * |
3399 | * return: | 3389 | * return: |
3400 | * | 3390 | * |
3401 | * note: | 3391 | * note: |
3402 | * PWMAP: | 3392 | * PWMAP: |
3403 | * 1. truncate (non-COMMIT_NOLINK file) | 3393 | * 1. truncate (non-COMMIT_NOLINK file) |
3404 | * by jfs_truncate() or jfs_open(O_TRUNC): | 3394 | * by jfs_truncate() or jfs_open(O_TRUNC): |
3405 | * xtree is updated; | 3395 | * xtree is updated; |
3406 | * 2. truncate index table of directory when last entry removed | 3396 | * 2. truncate index table of directory when last entry removed |
3407 | * map update via tlock at commit time; | 3397 | * map update via tlock at commit time; |
3408 | * PMAP: | 3398 | * PMAP: |
3409 | * Call xtTruncate_pmap instead | 3399 | * Call xtTruncate_pmap instead |
3410 | * WMAP: | 3400 | * WMAP: |
3411 | * 1. remove (free zero link count) on last reference release | 3401 | * 1. remove (free zero link count) on last reference release |
3412 | * (pmap has been freed at commit zero link count); | 3402 | * (pmap has been freed at commit zero link count); |
3413 | * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): | 3403 | * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): |
3414 | * xtree is updated; | 3404 | * xtree is updated; |
3415 | * map update directly at truncation time; | 3405 | * map update directly at truncation time; |
3416 | * | 3406 | * |
3417 | * if (DELETE) | 3407 | * if (DELETE) |
3418 | * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); | 3408 | * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); |
3419 | * else if (TRUNCATE) | 3409 | * else if (TRUNCATE) |
3420 | * must write LOG_NOREDOPAGE for deleted index page; | 3410 | * must write LOG_NOREDOPAGE for deleted index page; |
3421 | * | 3411 | * |
3422 | * pages may already have been tlocked by anonymous transactions | 3412 | * pages may already have been tlocked by anonymous transactions |
3423 | * during file growth (i.e., write) before truncation; | 3413 | * during file growth (i.e., write) before truncation; |
3424 | * | 3414 | * |
3425 | * except last truncated entry, deleted entries remains as is | 3415 | * except last truncated entry, deleted entries remains as is |
3426 | * in the page (nextindex is updated) for other use | 3416 | * in the page (nextindex is updated) for other use |
3427 | * (e.g., log/update allocation map): this avoid copying the page | 3417 | * (e.g., log/update allocation map): this avoid copying the page |
3428 | * info but delay free of pages; | 3418 | * info but delay free of pages; |
3429 | * | 3419 | * |
3430 | */ | 3420 | */ |
3431 | s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | 3421 | s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) |
3432 | { | 3422 | { |
3433 | int rc = 0; | 3423 | int rc = 0; |
3434 | s64 teof; | 3424 | s64 teof; |
3435 | struct metapage *mp; | 3425 | struct metapage *mp; |
3436 | xtpage_t *p; | 3426 | xtpage_t *p; |
3437 | s64 bn; | 3427 | s64 bn; |
3438 | int index, nextindex; | 3428 | int index, nextindex; |
3439 | xad_t *xad; | 3429 | xad_t *xad; |
3440 | s64 xoff, xaddr; | 3430 | s64 xoff, xaddr; |
3441 | int xlen, len, freexlen; | 3431 | int xlen, len, freexlen; |
3442 | struct btstack btstack; | 3432 | struct btstack btstack; |
3443 | struct btframe *parent; | 3433 | struct btframe *parent; |
3444 | struct tblock *tblk = NULL; | 3434 | struct tblock *tblk = NULL; |
3445 | struct tlock *tlck = NULL; | 3435 | struct tlock *tlck = NULL; |
3446 | struct xtlock *xtlck = NULL; | 3436 | struct xtlock *xtlck = NULL; |
3447 | struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ | 3437 | struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ |
3448 | struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ | 3438 | struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ |
3449 | s64 nfreed; | 3439 | s64 nfreed; |
3450 | int freed, log; | 3440 | int freed, log; |
3451 | int locked_leaves = 0; | 3441 | int locked_leaves = 0; |
3452 | 3442 | ||
3453 | /* save object truncation type */ | 3443 | /* save object truncation type */ |
3454 | if (tid) { | 3444 | if (tid) { |
3455 | tblk = tid_to_tblock(tid); | 3445 | tblk = tid_to_tblock(tid); |
3456 | tblk->xflag |= flag; | 3446 | tblk->xflag |= flag; |
3457 | } | 3447 | } |
3458 | 3448 | ||
3459 | nfreed = 0; | 3449 | nfreed = 0; |
3460 | 3450 | ||
3461 | flag &= COMMIT_MAP; | 3451 | flag &= COMMIT_MAP; |
3462 | assert(flag != COMMIT_PMAP); | 3452 | assert(flag != COMMIT_PMAP); |
3463 | 3453 | ||
3464 | if (flag == COMMIT_PWMAP) | 3454 | if (flag == COMMIT_PWMAP) |
3465 | log = 1; | 3455 | log = 1; |
3466 | else { | 3456 | else { |
3467 | log = 0; | 3457 | log = 0; |
3468 | xadlock.flag = mlckFREEXADLIST; | 3458 | xadlock.flag = mlckFREEXADLIST; |
3469 | xadlock.index = 1; | 3459 | xadlock.index = 1; |
3470 | } | 3460 | } |
3471 | 3461 | ||
3472 | /* | 3462 | /* |
3473 | * if the newsize is not an integral number of pages, | 3463 | * if the newsize is not an integral number of pages, |
3474 | * the file between newsize and next page boundary will | 3464 | * the file between newsize and next page boundary will |
3475 | * be cleared. | 3465 | * be cleared. |
3476 | * if truncating into a file hole, it will cause | 3466 | * if truncating into a file hole, it will cause |
3477 | * a full block to be allocated for the logical block. | 3467 | * a full block to be allocated for the logical block. |
3478 | */ | 3468 | */ |
3479 | 3469 | ||
3480 | /* | 3470 | /* |
3481 | * release page blocks of truncated region <teof, eof> | 3471 | * release page blocks of truncated region <teof, eof> |
3482 | * | 3472 | * |
3483 | * free the data blocks from the leaf index blocks. | 3473 | * free the data blocks from the leaf index blocks. |
3484 | * delete the parent index entries corresponding to | 3474 | * delete the parent index entries corresponding to |
3485 | * the freed child data/index blocks. | 3475 | * the freed child data/index blocks. |
3486 | * free the index blocks themselves which aren't needed | 3476 | * free the index blocks themselves which aren't needed |
3487 | * in new sized file. | 3477 | * in new sized file. |
3488 | * | 3478 | * |
3489 | * index blocks are updated only if the blocks are to be | 3479 | * index blocks are updated only if the blocks are to be |
3490 | * retained in the new sized file. | 3480 | * retained in the new sized file. |
3491 | * if type is PMAP, the data and index pages are NOT | 3481 | * if type is PMAP, the data and index pages are NOT |
3492 | * freed, and the data and index blocks are NOT freed | 3482 | * freed, and the data and index blocks are NOT freed |
3493 | * from working map. | 3483 | * from working map. |
3494 | * (this will allow continued access of data/index of | 3484 | * (this will allow continued access of data/index of |
3495 | * temporary file (zerolink count file truncated to zero-length)). | 3485 | * temporary file (zerolink count file truncated to zero-length)). |
3496 | */ | 3486 | */ |
3497 | teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> | 3487 | teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> |
3498 | JFS_SBI(ip->i_sb)->l2bsize; | 3488 | JFS_SBI(ip->i_sb)->l2bsize; |
3499 | 3489 | ||
3500 | /* clear stack */ | 3490 | /* clear stack */ |
3501 | BT_CLR(&btstack); | 3491 | BT_CLR(&btstack); |
3502 | 3492 | ||
3503 | /* | 3493 | /* |
3504 | * start with root | 3494 | * start with root |
3505 | * | 3495 | * |
3506 | * root resides in the inode | 3496 | * root resides in the inode |
3507 | */ | 3497 | */ |
3508 | bn = 0; | 3498 | bn = 0; |
3509 | 3499 | ||
3510 | /* | 3500 | /* |
3511 | * first access of each page: | 3501 | * first access of each page: |
3512 | */ | 3502 | */ |
3513 | getPage: | 3503 | getPage: |
3514 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 3504 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
3515 | if (rc) | 3505 | if (rc) |
3516 | return rc; | 3506 | return rc; |
3517 | 3507 | ||
3518 | /* process entries backward from last index */ | 3508 | /* process entries backward from last index */ |
3519 | index = le16_to_cpu(p->header.nextindex) - 1; | 3509 | index = le16_to_cpu(p->header.nextindex) - 1; |
3520 | 3510 | ||
3521 | 3511 | ||
3522 | /* Since this is the rightmost page at this level, and we may have | 3512 | /* Since this is the rightmost page at this level, and we may have |
3523 | * already freed a page that was formerly to the right, let's make | 3513 | * already freed a page that was formerly to the right, let's make |
3524 | * sure that the next pointer is zero. | 3514 | * sure that the next pointer is zero. |
3525 | */ | 3515 | */ |
3526 | if (p->header.next) { | 3516 | if (p->header.next) { |
3527 | if (log) | 3517 | if (log) |
3528 | /* | 3518 | /* |
3529 | * Make sure this change to the header is logged. | 3519 | * Make sure this change to the header is logged. |
3530 | * If we really truncate this leaf, the flag | 3520 | * If we really truncate this leaf, the flag |
3531 | * will be changed to tlckTRUNCATE | 3521 | * will be changed to tlckTRUNCATE |
3532 | */ | 3522 | */ |
3533 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); | 3523 | tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); |
3534 | BT_MARK_DIRTY(mp, ip); | 3524 | BT_MARK_DIRTY(mp, ip); |
3535 | p->header.next = 0; | 3525 | p->header.next = 0; |
3536 | } | 3526 | } |
3537 | 3527 | ||
3538 | if (p->header.flag & BT_INTERNAL) | 3528 | if (p->header.flag & BT_INTERNAL) |
3539 | goto getChild; | 3529 | goto getChild; |
3540 | 3530 | ||
3541 | /* | 3531 | /* |
3542 | * leaf page | 3532 | * leaf page |
3543 | */ | 3533 | */ |
3544 | freed = 0; | 3534 | freed = 0; |
3545 | 3535 | ||
3546 | /* does region covered by leaf page precede Teof ? */ | 3536 | /* does region covered by leaf page precede Teof ? */ |
3547 | xad = &p->xad[index]; | 3537 | xad = &p->xad[index]; |
3548 | xoff = offsetXAD(xad); | 3538 | xoff = offsetXAD(xad); |
3549 | xlen = lengthXAD(xad); | 3539 | xlen = lengthXAD(xad); |
3550 | if (teof >= xoff + xlen) { | 3540 | if (teof >= xoff + xlen) { |
3551 | XT_PUTPAGE(mp); | 3541 | XT_PUTPAGE(mp); |
3552 | goto getParent; | 3542 | goto getParent; |
3553 | } | 3543 | } |
3554 | 3544 | ||
3555 | /* (re)acquire tlock of the leaf page */ | 3545 | /* (re)acquire tlock of the leaf page */ |
3556 | if (log) { | 3546 | if (log) { |
3557 | if (++locked_leaves > MAX_TRUNCATE_LEAVES) { | 3547 | if (++locked_leaves > MAX_TRUNCATE_LEAVES) { |
3558 | /* | 3548 | /* |
3559 | * We need to limit the size of the transaction | 3549 | * We need to limit the size of the transaction |
3560 | * to avoid exhausting pagecache & tlocks | 3550 | * to avoid exhausting pagecache & tlocks |
3561 | */ | 3551 | */ |
3562 | XT_PUTPAGE(mp); | 3552 | XT_PUTPAGE(mp); |
3563 | newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; | 3553 | newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; |
3564 | goto getParent; | 3554 | goto getParent; |
3565 | } | 3555 | } |
3566 | tlck = txLock(tid, ip, mp, tlckXTREE); | 3556 | tlck = txLock(tid, ip, mp, tlckXTREE); |
3567 | tlck->type = tlckXTREE | tlckTRUNCATE; | 3557 | tlck->type = tlckXTREE | tlckTRUNCATE; |
3568 | xtlck = (struct xtlock *) & tlck->lock; | 3558 | xtlck = (struct xtlock *) & tlck->lock; |
3569 | xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; | 3559 | xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; |
3570 | } | 3560 | } |
3571 | BT_MARK_DIRTY(mp, ip); | 3561 | BT_MARK_DIRTY(mp, ip); |
3572 | 3562 | ||
3573 | /* | 3563 | /* |
3574 | * scan backward leaf page entries | 3564 | * scan backward leaf page entries |
3575 | */ | 3565 | */ |
3576 | for (; index >= XTENTRYSTART; index--) { | 3566 | for (; index >= XTENTRYSTART; index--) { |
3577 | xad = &p->xad[index]; | 3567 | xad = &p->xad[index]; |
3578 | xoff = offsetXAD(xad); | 3568 | xoff = offsetXAD(xad); |
3579 | xlen = lengthXAD(xad); | 3569 | xlen = lengthXAD(xad); |
3580 | xaddr = addressXAD(xad); | 3570 | xaddr = addressXAD(xad); |
3581 | 3571 | ||
3582 | /* | 3572 | /* |
3583 | * The "data" for a directory is indexed by the block | 3573 | * The "data" for a directory is indexed by the block |
3584 | * device's address space. This metadata must be invalidated | 3574 | * device's address space. This metadata must be invalidated |
3585 | * here | 3575 | * here |
3586 | */ | 3576 | */ |
3587 | if (S_ISDIR(ip->i_mode) && (teof == 0)) | 3577 | if (S_ISDIR(ip->i_mode) && (teof == 0)) |
3588 | invalidate_xad_metapages(ip, *xad); | 3578 | invalidate_xad_metapages(ip, *xad); |
3589 | /* | 3579 | /* |
3590 | * entry beyond eof: continue scan of current page | 3580 | * entry beyond eof: continue scan of current page |
3591 | * xad | 3581 | * xad |
3592 | * ---|---=======-------> | 3582 | * ---|---=======-------> |
3593 | * eof | 3583 | * eof |
3594 | */ | 3584 | */ |
3595 | if (teof < xoff) { | 3585 | if (teof < xoff) { |
3596 | nfreed += xlen; | 3586 | nfreed += xlen; |
3597 | continue; | 3587 | continue; |
3598 | } | 3588 | } |
3599 | 3589 | ||
3600 | /* | 3590 | /* |
3601 | * (xoff <= teof): last entry to be deleted from page; | 3591 | * (xoff <= teof): last entry to be deleted from page; |
3602 | * If other entries remain in page: keep and update the page. | 3592 | * If other entries remain in page: keep and update the page. |
3603 | */ | 3593 | */ |
3604 | 3594 | ||
3605 | /* | 3595 | /* |
3606 | * eof == entry_start: delete the entry | 3596 | * eof == entry_start: delete the entry |
3607 | * xad | 3597 | * xad |
3608 | * -------|=======-------> | 3598 | * -------|=======-------> |
3609 | * eof | 3599 | * eof |
3610 | * | 3600 | * |
3611 | */ | 3601 | */ |
3612 | if (teof == xoff) { | 3602 | if (teof == xoff) { |
3613 | nfreed += xlen; | 3603 | nfreed += xlen; |
3614 | 3604 | ||
3615 | if (index == XTENTRYSTART) | 3605 | if (index == XTENTRYSTART) |
3616 | break; | 3606 | break; |
3617 | 3607 | ||
3618 | nextindex = index; | 3608 | nextindex = index; |
3619 | } | 3609 | } |
3620 | /* | 3610 | /* |
3621 | * eof within the entry: truncate the entry. | 3611 | * eof within the entry: truncate the entry. |
3622 | * xad | 3612 | * xad |
3623 | * -------===|===-------> | 3613 | * -------===|===-------> |
3624 | * eof | 3614 | * eof |
3625 | */ | 3615 | */ |
3626 | else if (teof < xoff + xlen) { | 3616 | else if (teof < xoff + xlen) { |
3627 | /* update truncated entry */ | 3617 | /* update truncated entry */ |
3628 | len = teof - xoff; | 3618 | len = teof - xoff; |
3629 | freexlen = xlen - len; | 3619 | freexlen = xlen - len; |
3630 | XADlength(xad, len); | 3620 | XADlength(xad, len); |
3631 | 3621 | ||
3632 | /* save pxd of truncated extent in tlck */ | 3622 | /* save pxd of truncated extent in tlck */ |
3633 | xaddr += len; | 3623 | xaddr += len; |
3634 | if (log) { /* COMMIT_PWMAP */ | 3624 | if (log) { /* COMMIT_PWMAP */ |
3635 | xtlck->lwm.offset = (xtlck->lwm.offset) ? | 3625 | xtlck->lwm.offset = (xtlck->lwm.offset) ? |
3636 | min(index, (int)xtlck->lwm.offset) : index; | 3626 | min(index, (int)xtlck->lwm.offset) : index; |
3637 | xtlck->lwm.length = index + 1 - | 3627 | xtlck->lwm.length = index + 1 - |
3638 | xtlck->lwm.offset; | 3628 | xtlck->lwm.offset; |
3639 | xtlck->twm.offset = index; | 3629 | xtlck->twm.offset = index; |
3640 | pxdlock = (struct pxd_lock *) & xtlck->pxdlock; | 3630 | pxdlock = (struct pxd_lock *) & xtlck->pxdlock; |
3641 | pxdlock->flag = mlckFREEPXD; | 3631 | pxdlock->flag = mlckFREEPXD; |
3642 | PXDaddress(&pxdlock->pxd, xaddr); | 3632 | PXDaddress(&pxdlock->pxd, xaddr); |
3643 | PXDlength(&pxdlock->pxd, freexlen); | 3633 | PXDlength(&pxdlock->pxd, freexlen); |
3644 | } | 3634 | } |
3645 | /* free truncated extent */ | 3635 | /* free truncated extent */ |
3646 | else { /* COMMIT_WMAP */ | 3636 | else { /* COMMIT_WMAP */ |
3647 | 3637 | ||
3648 | pxdlock = (struct pxd_lock *) & xadlock; | 3638 | pxdlock = (struct pxd_lock *) & xadlock; |
3649 | pxdlock->flag = mlckFREEPXD; | 3639 | pxdlock->flag = mlckFREEPXD; |
3650 | PXDaddress(&pxdlock->pxd, xaddr); | 3640 | PXDaddress(&pxdlock->pxd, xaddr); |
3651 | PXDlength(&pxdlock->pxd, freexlen); | 3641 | PXDlength(&pxdlock->pxd, freexlen); |
3652 | txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); | 3642 | txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); |
3653 | 3643 | ||
3654 | /* reset map lock */ | 3644 | /* reset map lock */ |
3655 | xadlock.flag = mlckFREEXADLIST; | 3645 | xadlock.flag = mlckFREEXADLIST; |
3656 | } | 3646 | } |
3657 | 3647 | ||
3658 | /* current entry is new last entry; */ | 3648 | /* current entry is new last entry; */ |
3659 | nextindex = index + 1; | 3649 | nextindex = index + 1; |
3660 | 3650 | ||
3661 | nfreed += freexlen; | 3651 | nfreed += freexlen; |
3662 | } | 3652 | } |
3663 | /* | 3653 | /* |
3664 | * eof beyond the entry: | 3654 | * eof beyond the entry: |
3665 | * xad | 3655 | * xad |
3666 | * -------=======---|---> | 3656 | * -------=======---|---> |
3667 | * eof | 3657 | * eof |
3668 | */ | 3658 | */ |
3669 | else { /* (xoff + xlen < teof) */ | 3659 | else { /* (xoff + xlen < teof) */ |
3670 | 3660 | ||
3671 | nextindex = index + 1; | 3661 | nextindex = index + 1; |
3672 | } | 3662 | } |
3673 | 3663 | ||
3674 | if (nextindex < le16_to_cpu(p->header.nextindex)) { | 3664 | if (nextindex < le16_to_cpu(p->header.nextindex)) { |
3675 | if (!log) { /* COMMIT_WAMP */ | 3665 | if (!log) { /* COMMIT_WAMP */ |
3676 | xadlock.xdlist = &p->xad[nextindex]; | 3666 | xadlock.xdlist = &p->xad[nextindex]; |
3677 | xadlock.count = | 3667 | xadlock.count = |
3678 | le16_to_cpu(p->header.nextindex) - | 3668 | le16_to_cpu(p->header.nextindex) - |
3679 | nextindex; | 3669 | nextindex; |
3680 | txFreeMap(ip, (struct maplock *) & xadlock, | 3670 | txFreeMap(ip, (struct maplock *) & xadlock, |
3681 | NULL, COMMIT_WMAP); | 3671 | NULL, COMMIT_WMAP); |
3682 | } | 3672 | } |
3683 | p->header.nextindex = cpu_to_le16(nextindex); | 3673 | p->header.nextindex = cpu_to_le16(nextindex); |
3684 | } | 3674 | } |
3685 | 3675 | ||
3686 | XT_PUTPAGE(mp); | 3676 | XT_PUTPAGE(mp); |
3687 | 3677 | ||
3688 | /* assert(freed == 0); */ | 3678 | /* assert(freed == 0); */ |
3689 | goto getParent; | 3679 | goto getParent; |
3690 | } /* end scan of leaf page entries */ | 3680 | } /* end scan of leaf page entries */ |
3691 | 3681 | ||
3692 | freed = 1; | 3682 | freed = 1; |
3693 | 3683 | ||
3694 | /* | 3684 | /* |
3695 | * leaf page become empty: free the page if type != PMAP | 3685 | * leaf page become empty: free the page if type != PMAP |
3696 | */ | 3686 | */ |
3697 | if (log) { /* COMMIT_PWMAP */ | 3687 | if (log) { /* COMMIT_PWMAP */ |
3698 | /* txCommit() with tlckFREE: | 3688 | /* txCommit() with tlckFREE: |
3699 | * free data extents covered by leaf [XTENTRYSTART:hwm); | 3689 | * free data extents covered by leaf [XTENTRYSTART:hwm); |
3700 | * invalidate leaf if COMMIT_PWMAP; | 3690 | * invalidate leaf if COMMIT_PWMAP; |
3701 | * if (TRUNCATE), will write LOG_NOREDOPAGE; | 3691 | * if (TRUNCATE), will write LOG_NOREDOPAGE; |
3702 | */ | 3692 | */ |
3703 | tlck->type = tlckXTREE | tlckFREE; | 3693 | tlck->type = tlckXTREE | tlckFREE; |
3704 | } else { /* COMMIT_WAMP */ | 3694 | } else { /* COMMIT_WAMP */ |
3705 | 3695 | ||
3706 | /* free data extents covered by leaf */ | 3696 | /* free data extents covered by leaf */ |
3707 | xadlock.xdlist = &p->xad[XTENTRYSTART]; | 3697 | xadlock.xdlist = &p->xad[XTENTRYSTART]; |
3708 | xadlock.count = | 3698 | xadlock.count = |
3709 | le16_to_cpu(p->header.nextindex) - XTENTRYSTART; | 3699 | le16_to_cpu(p->header.nextindex) - XTENTRYSTART; |
3710 | txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); | 3700 | txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); |
3711 | } | 3701 | } |
3712 | 3702 | ||
3713 | if (p->header.flag & BT_ROOT) { | 3703 | if (p->header.flag & BT_ROOT) { |
3714 | p->header.flag &= ~BT_INTERNAL; | 3704 | p->header.flag &= ~BT_INTERNAL; |
3715 | p->header.flag |= BT_LEAF; | 3705 | p->header.flag |= BT_LEAF; |
3716 | p->header.nextindex = cpu_to_le16(XTENTRYSTART); | 3706 | p->header.nextindex = cpu_to_le16(XTENTRYSTART); |
3717 | 3707 | ||
3718 | XT_PUTPAGE(mp); /* debug */ | 3708 | XT_PUTPAGE(mp); /* debug */ |
3719 | goto out; | 3709 | goto out; |
3720 | } else { | 3710 | } else { |
3721 | if (log) { /* COMMIT_PWMAP */ | 3711 | if (log) { /* COMMIT_PWMAP */ |
3722 | /* page will be invalidated at tx completion | 3712 | /* page will be invalidated at tx completion |
3723 | */ | 3713 | */ |
3724 | XT_PUTPAGE(mp); | 3714 | XT_PUTPAGE(mp); |
3725 | } else { /* COMMIT_WMAP */ | 3715 | } else { /* COMMIT_WMAP */ |
3726 | 3716 | ||
3727 | if (mp->lid) | 3717 | if (mp->lid) |
3728 | lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; | 3718 | lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; |
3729 | 3719 | ||
3730 | /* invalidate empty leaf page */ | 3720 | /* invalidate empty leaf page */ |
3731 | discard_metapage(mp); | 3721 | discard_metapage(mp); |
3732 | } | 3722 | } |
3733 | } | 3723 | } |
3734 | 3724 | ||
3735 | /* | 3725 | /* |
3736 | * the leaf page become empty: delete the parent entry | 3726 | * the leaf page become empty: delete the parent entry |
3737 | * for the leaf page if the parent page is to be kept | 3727 | * for the leaf page if the parent page is to be kept |
3738 | * in the new sized file. | 3728 | * in the new sized file. |
3739 | */ | 3729 | */ |
3740 | 3730 | ||
3741 | /* | 3731 | /* |
3742 | * go back up to the parent page | 3732 | * go back up to the parent page |
3743 | */ | 3733 | */ |
3744 | getParent: | 3734 | getParent: |
3745 | /* pop/restore parent entry for the current child page */ | 3735 | /* pop/restore parent entry for the current child page */ |
3746 | if ((parent = BT_POP(&btstack)) == NULL) | 3736 | if ((parent = BT_POP(&btstack)) == NULL) |
3747 | /* current page must have been root */ | 3737 | /* current page must have been root */ |
3748 | goto out; | 3738 | goto out; |
3749 | 3739 | ||
3750 | /* get back the parent page */ | 3740 | /* get back the parent page */ |
3751 | bn = parent->bn; | 3741 | bn = parent->bn; |
3752 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 3742 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
3753 | if (rc) | 3743 | if (rc) |
3754 | return rc; | 3744 | return rc; |
3755 | 3745 | ||
3756 | index = parent->index; | 3746 | index = parent->index; |
3757 | 3747 | ||
3758 | /* | 3748 | /* |
3759 | * child page was not empty: | 3749 | * child page was not empty: |
3760 | */ | 3750 | */ |
3761 | if (freed == 0) { | 3751 | if (freed == 0) { |
3762 | /* has any entry deleted from parent ? */ | 3752 | /* has any entry deleted from parent ? */ |
3763 | if (index < le16_to_cpu(p->header.nextindex) - 1) { | 3753 | if (index < le16_to_cpu(p->header.nextindex) - 1) { |
3764 | /* (re)acquire tlock on the parent page */ | 3754 | /* (re)acquire tlock on the parent page */ |
3765 | if (log) { /* COMMIT_PWMAP */ | 3755 | if (log) { /* COMMIT_PWMAP */ |
3766 | /* txCommit() with tlckTRUNCATE: | 3756 | /* txCommit() with tlckTRUNCATE: |
3767 | * free child extents covered by parent [); | 3757 | * free child extents covered by parent [); |
3768 | */ | 3758 | */ |
3769 | tlck = txLock(tid, ip, mp, tlckXTREE); | 3759 | tlck = txLock(tid, ip, mp, tlckXTREE); |
3770 | xtlck = (struct xtlock *) & tlck->lock; | 3760 | xtlck = (struct xtlock *) & tlck->lock; |
3771 | if (!(tlck->type & tlckTRUNCATE)) { | 3761 | if (!(tlck->type & tlckTRUNCATE)) { |
3772 | xtlck->hwm.offset = | 3762 | xtlck->hwm.offset = |
3773 | le16_to_cpu(p->header. | 3763 | le16_to_cpu(p->header. |
3774 | nextindex) - 1; | 3764 | nextindex) - 1; |
3775 | tlck->type = | 3765 | tlck->type = |
3776 | tlckXTREE | tlckTRUNCATE; | 3766 | tlckXTREE | tlckTRUNCATE; |
3777 | } | 3767 | } |
3778 | } else { /* COMMIT_WMAP */ | 3768 | } else { /* COMMIT_WMAP */ |
3779 | 3769 | ||
3780 | /* free child extents covered by parent */ | 3770 | /* free child extents covered by parent */ |
3781 | xadlock.xdlist = &p->xad[index + 1]; | 3771 | xadlock.xdlist = &p->xad[index + 1]; |
3782 | xadlock.count = | 3772 | xadlock.count = |
3783 | le16_to_cpu(p->header.nextindex) - | 3773 | le16_to_cpu(p->header.nextindex) - |
3784 | index - 1; | 3774 | index - 1; |
3785 | txFreeMap(ip, (struct maplock *) & xadlock, | 3775 | txFreeMap(ip, (struct maplock *) & xadlock, |
3786 | NULL, COMMIT_WMAP); | 3776 | NULL, COMMIT_WMAP); |
3787 | } | 3777 | } |
3788 | BT_MARK_DIRTY(mp, ip); | 3778 | BT_MARK_DIRTY(mp, ip); |
3789 | 3779 | ||
3790 | p->header.nextindex = cpu_to_le16(index + 1); | 3780 | p->header.nextindex = cpu_to_le16(index + 1); |
3791 | } | 3781 | } |
3792 | XT_PUTPAGE(mp); | 3782 | XT_PUTPAGE(mp); |
3793 | goto getParent; | 3783 | goto getParent; |
3794 | } | 3784 | } |
3795 | 3785 | ||
3796 | /* | 3786 | /* |
3797 | * child page was empty: | 3787 | * child page was empty: |
3798 | */ | 3788 | */ |
3799 | nfreed += lengthXAD(&p->xad[index]); | 3789 | nfreed += lengthXAD(&p->xad[index]); |
3800 | 3790 | ||
3801 | /* | 3791 | /* |
3802 | * During working map update, child page's tlock must be handled | 3792 | * During working map update, child page's tlock must be handled |
3803 | * before parent's. This is because the parent's tlock will cause | 3793 | * before parent's. This is because the parent's tlock will cause |
3804 | * the child's disk space to be marked available in the wmap, so | 3794 | * the child's disk space to be marked available in the wmap, so |
3805 | * it's important that the child page be released by that time. | 3795 | * it's important that the child page be released by that time. |
3806 | * | 3796 | * |
3807 | * ToDo: tlocks should be on doubly-linked list, so we can | 3797 | * ToDo: tlocks should be on doubly-linked list, so we can |
3808 | * quickly remove it and add it to the end. | 3798 | * quickly remove it and add it to the end. |
3809 | */ | 3799 | */ |
3810 | 3800 | ||
3811 | /* | 3801 | /* |
3812 | * Move parent page's tlock to the end of the tid's tlock list | 3802 | * Move parent page's tlock to the end of the tid's tlock list |
3813 | */ | 3803 | */ |
3814 | if (log && mp->lid && (tblk->last != mp->lid) && | 3804 | if (log && mp->lid && (tblk->last != mp->lid) && |
3815 | lid_to_tlock(mp->lid)->tid) { | 3805 | lid_to_tlock(mp->lid)->tid) { |
3816 | lid_t lid = mp->lid; | 3806 | lid_t lid = mp->lid; |
3817 | struct tlock *prev; | 3807 | struct tlock *prev; |
3818 | 3808 | ||
3819 | tlck = lid_to_tlock(lid); | 3809 | tlck = lid_to_tlock(lid); |
3820 | 3810 | ||
3821 | if (tblk->next == lid) | 3811 | if (tblk->next == lid) |
3822 | tblk->next = tlck->next; | 3812 | tblk->next = tlck->next; |
3823 | else { | 3813 | else { |
3824 | for (prev = lid_to_tlock(tblk->next); | 3814 | for (prev = lid_to_tlock(tblk->next); |
3825 | prev->next != lid; | 3815 | prev->next != lid; |
3826 | prev = lid_to_tlock(prev->next)) { | 3816 | prev = lid_to_tlock(prev->next)) { |
3827 | assert(prev->next); | 3817 | assert(prev->next); |
3828 | } | 3818 | } |
3829 | prev->next = tlck->next; | 3819 | prev->next = tlck->next; |
3830 | } | 3820 | } |
3831 | lid_to_tlock(tblk->last)->next = lid; | 3821 | lid_to_tlock(tblk->last)->next = lid; |
3832 | tlck->next = 0; | 3822 | tlck->next = 0; |
3833 | tblk->last = lid; | 3823 | tblk->last = lid; |
3834 | } | 3824 | } |
3835 | 3825 | ||
3836 | /* | 3826 | /* |
3837 | * parent page become empty: free the page | 3827 | * parent page become empty: free the page |
3838 | */ | 3828 | */ |
3839 | if (index == XTENTRYSTART) { | 3829 | if (index == XTENTRYSTART) { |
3840 | if (log) { /* COMMIT_PWMAP */ | 3830 | if (log) { /* COMMIT_PWMAP */ |
3841 | /* txCommit() with tlckFREE: | 3831 | /* txCommit() with tlckFREE: |
3842 | * free child extents covered by parent; | 3832 | * free child extents covered by parent; |
3843 | * invalidate parent if COMMIT_PWMAP; | 3833 | * invalidate parent if COMMIT_PWMAP; |
3844 | */ | 3834 | */ |
3845 | tlck = txLock(tid, ip, mp, tlckXTREE); | 3835 | tlck = txLock(tid, ip, mp, tlckXTREE); |
3846 | xtlck = (struct xtlock *) & tlck->lock; | 3836 | xtlck = (struct xtlock *) & tlck->lock; |
3847 | xtlck->hwm.offset = | 3837 | xtlck->hwm.offset = |
3848 | le16_to_cpu(p->header.nextindex) - 1; | 3838 | le16_to_cpu(p->header.nextindex) - 1; |
3849 | tlck->type = tlckXTREE | tlckFREE; | 3839 | tlck->type = tlckXTREE | tlckFREE; |
3850 | } else { /* COMMIT_WMAP */ | 3840 | } else { /* COMMIT_WMAP */ |
3851 | 3841 | ||
3852 | /* free child extents covered by parent */ | 3842 | /* free child extents covered by parent */ |
3853 | xadlock.xdlist = &p->xad[XTENTRYSTART]; | 3843 | xadlock.xdlist = &p->xad[XTENTRYSTART]; |
3854 | xadlock.count = | 3844 | xadlock.count = |
3855 | le16_to_cpu(p->header.nextindex) - | 3845 | le16_to_cpu(p->header.nextindex) - |
3856 | XTENTRYSTART; | 3846 | XTENTRYSTART; |
3857 | txFreeMap(ip, (struct maplock *) & xadlock, NULL, | 3847 | txFreeMap(ip, (struct maplock *) & xadlock, NULL, |
3858 | COMMIT_WMAP); | 3848 | COMMIT_WMAP); |
3859 | } | 3849 | } |
3860 | BT_MARK_DIRTY(mp, ip); | 3850 | BT_MARK_DIRTY(mp, ip); |
3861 | 3851 | ||
3862 | if (p->header.flag & BT_ROOT) { | 3852 | if (p->header.flag & BT_ROOT) { |
3863 | p->header.flag &= ~BT_INTERNAL; | 3853 | p->header.flag &= ~BT_INTERNAL; |
3864 | p->header.flag |= BT_LEAF; | 3854 | p->header.flag |= BT_LEAF; |
3865 | p->header.nextindex = cpu_to_le16(XTENTRYSTART); | 3855 | p->header.nextindex = cpu_to_le16(XTENTRYSTART); |
3866 | if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { | 3856 | if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { |
3867 | /* | 3857 | /* |
3868 | * Shrink root down to allow inline | 3858 | * Shrink root down to allow inline |
3869 | * EA (otherwise fsck complains) | 3859 | * EA (otherwise fsck complains) |
3870 | */ | 3860 | */ |
3871 | p->header.maxentry = | 3861 | p->header.maxentry = |
3872 | cpu_to_le16(XTROOTINITSLOT); | 3862 | cpu_to_le16(XTROOTINITSLOT); |
3873 | JFS_IP(ip)->mode2 |= INLINEEA; | 3863 | JFS_IP(ip)->mode2 |= INLINEEA; |
3874 | } | 3864 | } |
3875 | 3865 | ||
3876 | XT_PUTPAGE(mp); /* debug */ | 3866 | XT_PUTPAGE(mp); /* debug */ |
3877 | goto out; | 3867 | goto out; |
3878 | } else { | 3868 | } else { |
3879 | if (log) { /* COMMIT_PWMAP */ | 3869 | if (log) { /* COMMIT_PWMAP */ |
3880 | /* page will be invalidated at tx completion | 3870 | /* page will be invalidated at tx completion |
3881 | */ | 3871 | */ |
3882 | XT_PUTPAGE(mp); | 3872 | XT_PUTPAGE(mp); |
3883 | } else { /* COMMIT_WMAP */ | 3873 | } else { /* COMMIT_WMAP */ |
3884 | 3874 | ||
3885 | if (mp->lid) | 3875 | if (mp->lid) |
3886 | lid_to_tlock(mp->lid)->flag |= | 3876 | lid_to_tlock(mp->lid)->flag |= |
3887 | tlckFREELOCK; | 3877 | tlckFREELOCK; |
3888 | 3878 | ||
3889 | /* invalidate parent page */ | 3879 | /* invalidate parent page */ |
3890 | discard_metapage(mp); | 3880 | discard_metapage(mp); |
3891 | } | 3881 | } |
3892 | 3882 | ||
3893 | /* parent has become empty and freed: | 3883 | /* parent has become empty and freed: |
3894 | * go back up to its parent page | 3884 | * go back up to its parent page |
3895 | */ | 3885 | */ |
3896 | /* freed = 1; */ | 3886 | /* freed = 1; */ |
3897 | goto getParent; | 3887 | goto getParent; |
3898 | } | 3888 | } |
3899 | } | 3889 | } |
3900 | /* | 3890 | /* |
3901 | * parent page still has entries for front region; | 3891 | * parent page still has entries for front region; |
3902 | */ | 3892 | */ |
3903 | else { | 3893 | else { |
3904 | /* try truncate region covered by preceding entry | 3894 | /* try truncate region covered by preceding entry |
3905 | * (process backward) | 3895 | * (process backward) |
3906 | */ | 3896 | */ |
3907 | index--; | 3897 | index--; |
3908 | 3898 | ||
3909 | /* go back down to the child page corresponding | 3899 | /* go back down to the child page corresponding |
3910 | * to the entry | 3900 | * to the entry |
3911 | */ | 3901 | */ |
3912 | goto getChild; | 3902 | goto getChild; |
3913 | } | 3903 | } |
3914 | 3904 | ||
3915 | /* | 3905 | /* |
3916 | * internal page: go down to child page of current entry | 3906 | * internal page: go down to child page of current entry |
3917 | */ | 3907 | */ |
3918 | getChild: | 3908 | getChild: |
3919 | /* save current parent entry for the child page */ | 3909 | /* save current parent entry for the child page */ |
3920 | if (BT_STACK_FULL(&btstack)) { | 3910 | if (BT_STACK_FULL(&btstack)) { |
3921 | jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); | 3911 | jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); |
3922 | XT_PUTPAGE(mp); | 3912 | XT_PUTPAGE(mp); |
3923 | return -EIO; | 3913 | return -EIO; |
3924 | } | 3914 | } |
3925 | BT_PUSH(&btstack, bn, index); | 3915 | BT_PUSH(&btstack, bn, index); |
3926 | 3916 | ||
3927 | /* get child page */ | 3917 | /* get child page */ |
3928 | xad = &p->xad[index]; | 3918 | xad = &p->xad[index]; |
3929 | bn = addressXAD(xad); | 3919 | bn = addressXAD(xad); |
3930 | 3920 | ||
3931 | /* | 3921 | /* |
3932 | * first access of each internal entry: | 3922 | * first access of each internal entry: |
3933 | */ | 3923 | */ |
3934 | /* release parent page */ | 3924 | /* release parent page */ |
3935 | XT_PUTPAGE(mp); | 3925 | XT_PUTPAGE(mp); |
3936 | 3926 | ||
3937 | /* process the child page */ | 3927 | /* process the child page */ |
3938 | goto getPage; | 3928 | goto getPage; |
3939 | 3929 | ||
3940 | out: | 3930 | out: |
3941 | /* | 3931 | /* |
3942 | * update file resource stat | 3932 | * update file resource stat |
3943 | */ | 3933 | */ |
3944 | /* set size | 3934 | /* set size |
3945 | */ | 3935 | */ |
3946 | if (S_ISDIR(ip->i_mode) && !newsize) | 3936 | if (S_ISDIR(ip->i_mode) && !newsize) |
3947 | ip->i_size = 1; /* fsck hates zero-length directories */ | 3937 | ip->i_size = 1; /* fsck hates zero-length directories */ |
3948 | else | 3938 | else |
3949 | ip->i_size = newsize; | 3939 | ip->i_size = newsize; |
3950 | 3940 | ||
3951 | /* update quota allocation to reflect freed blocks */ | 3941 | /* update quota allocation to reflect freed blocks */ |
3952 | DQUOT_FREE_BLOCK(ip, nfreed); | 3942 | DQUOT_FREE_BLOCK(ip, nfreed); |
3953 | 3943 | ||
3954 | /* | 3944 | /* |
3955 | * free tlock of invalidated pages | 3945 | * free tlock of invalidated pages |
3956 | */ | 3946 | */ |
3957 | if (flag == COMMIT_WMAP) | 3947 | if (flag == COMMIT_WMAP) |
3958 | txFreelock(ip); | 3948 | txFreelock(ip); |
3959 | 3949 | ||
3960 | return newsize; | 3950 | return newsize; |
3961 | } | 3951 | } |
3962 | 3952 | ||
3963 | 3953 | ||
3964 | /* | 3954 | /* |
3965 | * xtTruncate_pmap() | 3955 | * xtTruncate_pmap() |
3966 | * | 3956 | * |
3967 | * function: | 3957 | * function: |
3968 | * Perform truncate to zero length for deleted file, leaving the | 3958 | * Perform truncate to zero length for deleted file, leaving the |
3969 | * the xtree and working map untouched. This allows the file to | 3959 | * the xtree and working map untouched. This allows the file to |
3970 | * be accessed via open file handles, while the delete of the file | 3960 | * be accessed via open file handles, while the delete of the file |
3971 | * is committed to disk. | 3961 | * is committed to disk. |
3972 | * | 3962 | * |
3973 | * parameter: | 3963 | * parameter: |
3974 | * tid_t tid, | 3964 | * tid_t tid, |
3975 | * struct inode *ip, | 3965 | * struct inode *ip, |
3976 | * s64 committed_size) | 3966 | * s64 committed_size) |
3977 | * | 3967 | * |
3978 | * return: new committed size | 3968 | * return: new committed size |
3979 | * | 3969 | * |
3980 | * note: | 3970 | * note: |
3981 | * | 3971 | * |
3982 | * To avoid deadlock by holding too many transaction locks, the | 3972 | * To avoid deadlock by holding too many transaction locks, the |
3983 | * truncation may be broken up into multiple transactions. | 3973 | * truncation may be broken up into multiple transactions. |
3984 | * The committed_size keeps track of part of the file has been | 3974 | * The committed_size keeps track of part of the file has been |
3985 | * freed from the pmaps. | 3975 | * freed from the pmaps. |
3986 | */ | 3976 | */ |
3987 | s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | 3977 | s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) |
3988 | { | 3978 | { |
3989 | s64 bn; | 3979 | s64 bn; |
3990 | struct btstack btstack; | 3980 | struct btstack btstack; |
3991 | int cmp; | 3981 | int cmp; |
3992 | int index; | 3982 | int index; |
3993 | int locked_leaves = 0; | 3983 | int locked_leaves = 0; |
3994 | struct metapage *mp; | 3984 | struct metapage *mp; |
3995 | xtpage_t *p; | 3985 | xtpage_t *p; |
3996 | struct btframe *parent; | 3986 | struct btframe *parent; |
3997 | int rc; | 3987 | int rc; |
3998 | struct tblock *tblk; | 3988 | struct tblock *tblk; |
3999 | struct tlock *tlck = NULL; | 3989 | struct tlock *tlck = NULL; |
4000 | xad_t *xad; | 3990 | xad_t *xad; |
4001 | int xlen; | 3991 | int xlen; |
4002 | s64 xoff; | 3992 | s64 xoff; |
4003 | struct xtlock *xtlck = NULL; | 3993 | struct xtlock *xtlck = NULL; |
4004 | 3994 | ||
4005 | /* save object truncation type */ | 3995 | /* save object truncation type */ |
4006 | tblk = tid_to_tblock(tid); | 3996 | tblk = tid_to_tblock(tid); |
4007 | tblk->xflag |= COMMIT_PMAP; | 3997 | tblk->xflag |= COMMIT_PMAP; |
4008 | 3998 | ||
4009 | /* clear stack */ | 3999 | /* clear stack */ |
4010 | BT_CLR(&btstack); | 4000 | BT_CLR(&btstack); |
4011 | 4001 | ||
4012 | if (committed_size) { | 4002 | if (committed_size) { |
4013 | xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; | 4003 | xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; |
4014 | rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); | 4004 | rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); |
4015 | if (rc) | 4005 | if (rc) |
4016 | return rc; | 4006 | return rc; |
4017 | 4007 | ||
4018 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); | 4008 | XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); |
4019 | 4009 | ||
4020 | if (cmp != 0) { | 4010 | if (cmp != 0) { |
4021 | XT_PUTPAGE(mp); | 4011 | XT_PUTPAGE(mp); |
4022 | jfs_error(ip->i_sb, | 4012 | jfs_error(ip->i_sb, |
4023 | "xtTruncate_pmap: did not find extent"); | 4013 | "xtTruncate_pmap: did not find extent"); |
4024 | return -EIO; | 4014 | return -EIO; |
4025 | } | 4015 | } |
4026 | } else { | 4016 | } else { |
4027 | /* | 4017 | /* |
4028 | * start with root | 4018 | * start with root |
4029 | * | 4019 | * |
4030 | * root resides in the inode | 4020 | * root resides in the inode |
4031 | */ | 4021 | */ |
4032 | bn = 0; | 4022 | bn = 0; |
4033 | 4023 | ||
4034 | /* | 4024 | /* |
4035 | * first access of each page: | 4025 | * first access of each page: |
4036 | */ | 4026 | */ |
4037 | getPage: | 4027 | getPage: |
4038 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 4028 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
4039 | if (rc) | 4029 | if (rc) |
4040 | return rc; | 4030 | return rc; |
4041 | 4031 | ||
4042 | /* process entries backward from last index */ | 4032 | /* process entries backward from last index */ |
4043 | index = le16_to_cpu(p->header.nextindex) - 1; | 4033 | index = le16_to_cpu(p->header.nextindex) - 1; |
4044 | 4034 | ||
4045 | if (p->header.flag & BT_INTERNAL) | 4035 | if (p->header.flag & BT_INTERNAL) |
4046 | goto getChild; | 4036 | goto getChild; |
4047 | } | 4037 | } |
4048 | 4038 | ||
4049 | /* | 4039 | /* |
4050 | * leaf page | 4040 | * leaf page |
4051 | */ | 4041 | */ |
4052 | 4042 | ||
4053 | if (++locked_leaves > MAX_TRUNCATE_LEAVES) { | 4043 | if (++locked_leaves > MAX_TRUNCATE_LEAVES) { |
4054 | /* | 4044 | /* |
4055 | * We need to limit the size of the transaction | 4045 | * We need to limit the size of the transaction |
4056 | * to avoid exhausting pagecache & tlocks | 4046 | * to avoid exhausting pagecache & tlocks |
4057 | */ | 4047 | */ |
4058 | xad = &p->xad[index]; | 4048 | xad = &p->xad[index]; |
4059 | xoff = offsetXAD(xad); | 4049 | xoff = offsetXAD(xad); |
4060 | xlen = lengthXAD(xad); | 4050 | xlen = lengthXAD(xad); |
4061 | XT_PUTPAGE(mp); | 4051 | XT_PUTPAGE(mp); |
4062 | return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; | 4052 | return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; |
4063 | } | 4053 | } |
4064 | tlck = txLock(tid, ip, mp, tlckXTREE); | 4054 | tlck = txLock(tid, ip, mp, tlckXTREE); |
4065 | tlck->type = tlckXTREE | tlckFREE; | 4055 | tlck->type = tlckXTREE | tlckFREE; |
4066 | xtlck = (struct xtlock *) & tlck->lock; | 4056 | xtlck = (struct xtlock *) & tlck->lock; |
4067 | xtlck->hwm.offset = index; | 4057 | xtlck->hwm.offset = index; |
4068 | 4058 | ||
4069 | 4059 | ||
4070 | XT_PUTPAGE(mp); | 4060 | XT_PUTPAGE(mp); |
4071 | 4061 | ||
4072 | /* | 4062 | /* |
4073 | * go back up to the parent page | 4063 | * go back up to the parent page |
4074 | */ | 4064 | */ |
4075 | getParent: | 4065 | getParent: |
4076 | /* pop/restore parent entry for the current child page */ | 4066 | /* pop/restore parent entry for the current child page */ |
4077 | if ((parent = BT_POP(&btstack)) == NULL) | 4067 | if ((parent = BT_POP(&btstack)) == NULL) |
4078 | /* current page must have been root */ | 4068 | /* current page must have been root */ |
4079 | goto out; | 4069 | goto out; |
4080 | 4070 | ||
4081 | /* get back the parent page */ | 4071 | /* get back the parent page */ |
4082 | bn = parent->bn; | 4072 | bn = parent->bn; |
4083 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 4073 | XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
4084 | if (rc) | 4074 | if (rc) |
4085 | return rc; | 4075 | return rc; |
4086 | 4076 | ||
4087 | index = parent->index; | 4077 | index = parent->index; |
4088 | 4078 | ||
4089 | /* | 4079 | /* |
4090 | * parent page become empty: free the page | 4080 | * parent page become empty: free the page |
4091 | */ | 4081 | */ |
4092 | if (index == XTENTRYSTART) { | 4082 | if (index == XTENTRYSTART) { |
4093 | /* txCommit() with tlckFREE: | 4083 | /* txCommit() with tlckFREE: |
4094 | * free child extents covered by parent; | 4084 | * free child extents covered by parent; |
4095 | * invalidate parent if COMMIT_PWMAP; | 4085 | * invalidate parent if COMMIT_PWMAP; |
4096 | */ | 4086 | */ |
4097 | tlck = txLock(tid, ip, mp, tlckXTREE); | 4087 | tlck = txLock(tid, ip, mp, tlckXTREE); |
4098 | xtlck = (struct xtlock *) & tlck->lock; | 4088 | xtlck = (struct xtlock *) & tlck->lock; |
4099 | xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; | 4089 | xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; |
4100 | tlck->type = tlckXTREE | tlckFREE; | 4090 | tlck->type = tlckXTREE | tlckFREE; |
4101 | 4091 | ||
4102 | XT_PUTPAGE(mp); | 4092 | XT_PUTPAGE(mp); |
4103 | 4093 | ||
4104 | if (p->header.flag & BT_ROOT) { | 4094 | if (p->header.flag & BT_ROOT) { |
4105 | 4095 | ||
4106 | goto out; | 4096 | goto out; |
4107 | } else { | 4097 | } else { |
4108 | goto getParent; | 4098 | goto getParent; |
4109 | } | 4099 | } |
4110 | } | 4100 | } |
4111 | /* | 4101 | /* |
4112 | * parent page still has entries for front region; | 4102 | * parent page still has entries for front region; |
4113 | */ | 4103 | */ |
4114 | else | 4104 | else |
4115 | index--; | 4105 | index--; |
4116 | /* | 4106 | /* |
4117 | * internal page: go down to child page of current entry | 4107 | * internal page: go down to child page of current entry |
4118 | */ | 4108 | */ |
4119 | getChild: | 4109 | getChild: |
4120 | /* save current parent entry for the child page */ | 4110 | /* save current parent entry for the child page */ |
4121 | if (BT_STACK_FULL(&btstack)) { | 4111 | if (BT_STACK_FULL(&btstack)) { |
4122 | jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); | 4112 | jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); |
4123 | XT_PUTPAGE(mp); | 4113 | XT_PUTPAGE(mp); |
4124 | return -EIO; | 4114 | return -EIO; |
4125 | } | 4115 | } |
4126 | BT_PUSH(&btstack, bn, index); | 4116 | BT_PUSH(&btstack, bn, index); |
4127 | 4117 | ||
4128 | /* get child page */ | 4118 | /* get child page */ |
4129 | xad = &p->xad[index]; | 4119 | xad = &p->xad[index]; |
4130 | bn = addressXAD(xad); | 4120 | bn = addressXAD(xad); |
4131 | 4121 | ||
4132 | /* | 4122 | /* |
4133 | * first access of each internal entry: | 4123 | * first access of each internal entry: |
4134 | */ | 4124 | */ |
4135 | /* release parent page */ | 4125 | /* release parent page */ |
4136 | XT_PUTPAGE(mp); | 4126 | XT_PUTPAGE(mp); |
4137 | 4127 | ||
4138 | /* process the child page */ | 4128 | /* process the child page */ |
4139 | goto getPage; | 4129 | goto getPage; |
4140 | 4130 | ||
4141 | out: | 4131 | out: |
4142 | 4132 | ||
4143 | return 0; | 4133 | return 0; |
4144 | } | 4134 | } |
4145 | 4135 | ||
4146 | #ifdef CONFIG_JFS_STATISTICS | 4136 | #ifdef CONFIG_JFS_STATISTICS |
4147 | int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, | 4137 | int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, |
4148 | int *eof, void *data) | 4138 | int *eof, void *data) |
4149 | { | 4139 | { |
4150 | int len = 0; | 4140 | int len = 0; |
4151 | off_t begin; | 4141 | off_t begin; |
4152 | 4142 | ||
4153 | len += sprintf(buffer, | 4143 | len += sprintf(buffer, |
4154 | "JFS Xtree statistics\n" | 4144 | "JFS Xtree statistics\n" |
4155 | "====================\n" | 4145 | "====================\n" |
4156 | "searches = %d\n" | 4146 | "searches = %d\n" |
4157 | "fast searches = %d\n" | 4147 | "fast searches = %d\n" |
4158 | "splits = %d\n", | 4148 | "splits = %d\n", |
4159 | xtStat.search, | 4149 | xtStat.search, |
4160 | xtStat.fastSearch, | 4150 | xtStat.fastSearch, |
4161 | xtStat.split); | 4151 | xtStat.split); |
4162 | 4152 | ||
4163 | begin = offset; | 4153 | begin = offset; |
4164 | *start = buffer + begin; | 4154 | *start = buffer + begin; |
4165 | len -= begin; | 4155 | len -= begin; |
4166 | 4156 | ||
4167 | if (len > length) | 4157 | if (len > length) |
4168 | len = length; | 4158 | len = length; |
4169 | else | 4159 | else |
4170 | *eof = 1; | 4160 | *eof = 1; |
4171 | 4161 | ||
4172 | if (len < 0) | 4162 | if (len < 0) |
4173 | len = 0; | 4163 | len = 0; |
4174 | 4164 | ||
4175 | return len; | 4165 | return len; |
4176 | } | 4166 | } |
4177 | #endif | 4167 | #endif |
4178 | 4168 |