Commit 847cc6371ba820763773e993000410d6d8d23515

Authored by Andi Kleen
Committed by root
1 parent ba253fbf6d

direct-io: merge direct_io_walker into __blockdev_direct_IO

This doesn't change anything for the compiler, but hch thought it would
make the code clearer.

I moved the reference counting into its own little inline.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>

Showing 1 changed file with 132 additions and 139 deletions Side-by-side Diff

... ... @@ -1043,138 +1043,12 @@
1043 1043 return ret;
1044 1044 }
1045 1045  
1046   -static inline ssize_t
1047   -direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1048   - const struct iovec *iov, loff_t offset, unsigned long nr_segs,
1049   - unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
1050   - dio_submit_t submit_io, struct dio *dio, struct dio_submit *sdio)
  1046 +static inline int drop_refcount(struct dio *dio)
1051 1047 {
1052   - unsigned long user_addr;
  1048 + int ret2;
1053 1049 unsigned long flags;
1054   - int seg;
1055   - ssize_t ret = 0;
1056   - ssize_t ret2;
1057   - size_t bytes;
1058   - struct buffer_head map_bh = { 0, };
1059 1050  
1060   - dio->inode = inode;
1061   - dio->rw = rw;
1062   - sdio->blkbits = blkbits;
1063   - sdio->blkfactor = inode->i_blkbits - blkbits;
1064   - sdio->block_in_file = offset >> blkbits;
1065   -
1066   - sdio->get_block = get_block;
1067   - dio->end_io = end_io;
1068   - sdio->submit_io = submit_io;
1069   - sdio->final_block_in_bio = -1;
1070   - sdio->next_block_for_io = -1;
1071   -
1072   - dio->iocb = iocb;
1073   - dio->i_size = i_size_read(inode);
1074   -
1075   - spin_lock_init(&dio->bio_lock);
1076   - dio->refcount = 1;
1077   -
1078 1051 /*
1079   - * In case of non-aligned buffers, we may need 2 more
1080   - * pages since we need to zero out first and last block.
1081   - */
1082   - if (unlikely(sdio->blkfactor))
1083   - sdio->pages_in_io = 2;
1084   -
1085   - for (seg = 0; seg < nr_segs; seg++) {
1086   - user_addr = (unsigned long)iov[seg].iov_base;
1087   - sdio->pages_in_io +=
1088   - ((user_addr+iov[seg].iov_len +PAGE_SIZE-1)/PAGE_SIZE
1089   - - user_addr/PAGE_SIZE);
1090   - }
1091   -
1092   - for (seg = 0; seg < nr_segs; seg++) {
1093   - user_addr = (unsigned long)iov[seg].iov_base;
1094   - sdio->size += bytes = iov[seg].iov_len;
1095   -
1096   - /* Index into the first page of the first block */
1097   - sdio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
1098   - sdio->final_block_in_request = sdio->block_in_file +
1099   - (bytes >> blkbits);
1100   - /* Page fetching state */
1101   - sdio->head = 0;
1102   - sdio->tail = 0;
1103   - sdio->curr_page = 0;
1104   -
1105   - sdio->total_pages = 0;
1106   - if (user_addr & (PAGE_SIZE-1)) {
1107   - sdio->total_pages++;
1108   - bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
1109   - }
1110   - sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
1111   - sdio->curr_user_address = user_addr;
1112   -
1113   - ret = do_direct_IO(dio, sdio, &map_bh);
1114   -
1115   - dio->result += iov[seg].iov_len -
1116   - ((sdio->final_block_in_request - sdio->block_in_file) <<
1117   - blkbits);
1118   -
1119   - if (ret) {
1120   - dio_cleanup(dio, sdio);
1121   - break;
1122   - }
1123   - } /* end iovec loop */
1124   -
1125   - if (ret == -ENOTBLK) {
1126   - /*
1127   - * The remaining part of the request will be
1128   - * be handled by buffered I/O when we return
1129   - */
1130   - ret = 0;
1131   - }
1132   - /*
1133   - * There may be some unwritten disk at the end of a part-written
1134   - * fs-block-sized block. Go zero that now.
1135   - */
1136   - dio_zero_block(dio, sdio, 1, &map_bh);
1137   -
1138   - if (sdio->cur_page) {
1139   - ret2 = dio_send_cur_page(dio, sdio, &map_bh);
1140   - if (ret == 0)
1141   - ret = ret2;
1142   - page_cache_release(sdio->cur_page);
1143   - sdio->cur_page = NULL;
1144   - }
1145   - if (sdio->bio)
1146   - dio_bio_submit(dio, sdio);
1147   -
1148   - /*
1149   - * It is possible that, we return short IO due to end of file.
1150   - * In that case, we need to release all the pages we got hold on.
1151   - */
1152   - dio_cleanup(dio, sdio);
1153   -
1154   - /*
1155   - * All block lookups have been performed. For READ requests
1156   - * we can let i_mutex go now that its achieved its purpose
1157   - * of protecting us from looking up uninitialized blocks.
1158   - */
1159   - if (rw == READ && (dio->flags & DIO_LOCKING))
1160   - mutex_unlock(&dio->inode->i_mutex);
1161   -
1162   - /*
1163   - * The only time we want to leave bios in flight is when a successful
1164   - * partial aio read or full aio write have been setup. In that case
1165   - * bio completion will call aio_complete. The only time it's safe to
1166   - * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
1167   - * This had *better* be the only place that raises -EIOCBQUEUED.
1168   - */
1169   - BUG_ON(ret == -EIOCBQUEUED);
1170   - if (dio->is_async && ret == 0 && dio->result &&
1171   - ((rw & READ) || (dio->result == sdio->size)))
1172   - ret = -EIOCBQUEUED;
1173   -
1174   - if (ret != -EIOCBQUEUED)
1175   - dio_await_completion(dio);
1176   -
1177   - /*
1178 1052 * Sync will always be dropping the final ref and completing the
1179 1053 * operation. AIO can if it was a broken operation described above or
1180 1054 * in fact if all the bios race to complete before we get here. In
... ... @@ -1188,14 +1062,7 @@
1188 1062 spin_lock_irqsave(&dio->bio_lock, flags);
1189 1063 ret2 = --dio->refcount;
1190 1064 spin_unlock_irqrestore(&dio->bio_lock, flags);
1191   -
1192   - if (ret2 == 0) {
1193   - ret = dio_complete(dio, offset, ret, false);
1194   - kmem_cache_free(dio_cache, dio);
1195   - } else
1196   - BUG_ON(ret != -EIOCBQUEUED);
1197   -
1198   - return ret;
  1065 + return ret2;
1199 1066 }
1200 1067  
1201 1068 /*
... ... @@ -1239,6 +1106,9 @@
1239 1106 loff_t end = offset;
1240 1107 struct dio *dio;
1241 1108 struct dio_submit sdio = { 0, };
  1109 + unsigned long user_addr;
  1110 + size_t bytes;
  1111 + struct buffer_head map_bh = { 0, };
1242 1112  
1243 1113 if (rw & WRITE)
1244 1114 rw = WRITE_ODIRECT;
... ... @@ -1316,9 +1186,132 @@
1316 1186 dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
1317 1187 (end > i_size_read(inode)));
1318 1188  
1319   - retval = direct_io_worker(rw, iocb, inode, iov, offset,
1320   - nr_segs, blkbits, get_block, end_io,
1321   - submit_io, dio, &sdio);
  1189 + retval = 0;
  1190 +
  1191 + dio->inode = inode;
  1192 + dio->rw = rw;
  1193 + sdio.blkbits = blkbits;
  1194 + sdio.blkfactor = inode->i_blkbits - blkbits;
  1195 + sdio.block_in_file = offset >> blkbits;
  1196 +
  1197 + sdio.get_block = get_block;
  1198 + dio->end_io = end_io;
  1199 + sdio.submit_io = submit_io;
  1200 + sdio.final_block_in_bio = -1;
  1201 + sdio.next_block_for_io = -1;
  1202 +
  1203 + dio->iocb = iocb;
  1204 + dio->i_size = i_size_read(inode);
  1205 +
  1206 + spin_lock_init(&dio->bio_lock);
  1207 + dio->refcount = 1;
  1208 +
  1209 + /*
  1210 + * In case of non-aligned buffers, we may need 2 more
  1211 + * pages since we need to zero out first and last block.
  1212 + */
  1213 + if (unlikely(sdio.blkfactor))
  1214 + sdio.pages_in_io = 2;
  1215 +
  1216 + for (seg = 0; seg < nr_segs; seg++) {
  1217 + user_addr = (unsigned long)iov[seg].iov_base;
  1218 + sdio.pages_in_io +=
  1219 + ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) /
  1220 + PAGE_SIZE - user_addr / PAGE_SIZE);
  1221 + }
  1222 +
  1223 + for (seg = 0; seg < nr_segs; seg++) {
  1224 + user_addr = (unsigned long)iov[seg].iov_base;
  1225 + sdio.size += bytes = iov[seg].iov_len;
  1226 +
  1227 + /* Index into the first page of the first block */
  1228 + sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
  1229 + sdio.final_block_in_request = sdio.block_in_file +
  1230 + (bytes >> blkbits);
  1231 + /* Page fetching state */
  1232 + sdio.head = 0;
  1233 + sdio.tail = 0;
  1234 + sdio.curr_page = 0;
  1235 +
  1236 + sdio.total_pages = 0;
  1237 + if (user_addr & (PAGE_SIZE-1)) {
  1238 + sdio.total_pages++;
  1239 + bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
  1240 + }
  1241 + sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
  1242 + sdio.curr_user_address = user_addr;
  1243 +
  1244 + retval = do_direct_IO(dio, &sdio, &map_bh);
  1245 +
  1246 + dio->result += iov[seg].iov_len -
  1247 + ((sdio.final_block_in_request - sdio.block_in_file) <<
  1248 + blkbits);
  1249 +
  1250 + if (retval) {
  1251 + dio_cleanup(dio, &sdio);
  1252 + break;
  1253 + }
  1254 + } /* end iovec loop */
  1255 +
  1256 + if (retval == -ENOTBLK) {
  1257 + /*
  1258 + * The remaining part of the request will be
  1259 + * be handled by buffered I/O when we return
  1260 + */
  1261 + retval = 0;
  1262 + }
  1263 + /*
  1264 + * There may be some unwritten disk at the end of a part-written
  1265 + * fs-block-sized block. Go zero that now.
  1266 + */
  1267 + dio_zero_block(dio, &sdio, 1, &map_bh);
  1268 +
  1269 + if (sdio.cur_page) {
  1270 + ssize_t ret2;
  1271 +
  1272 + ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
  1273 + if (retval == 0)
  1274 + retval = ret2;
  1275 + page_cache_release(sdio.cur_page);
  1276 + sdio.cur_page = NULL;
  1277 + }
  1278 + if (sdio.bio)
  1279 + dio_bio_submit(dio, &sdio);
  1280 +
  1281 + /*
  1282 + * It is possible that, we return short IO due to end of file.
  1283 + * In that case, we need to release all the pages we got hold on.
  1284 + */
  1285 + dio_cleanup(dio, &sdio);
  1286 +
  1287 + /*
  1288 + * All block lookups have been performed. For READ requests
  1289 + * we can let i_mutex go now that its achieved its purpose
  1290 + * of protecting us from looking up uninitialized blocks.
  1291 + */
  1292 + if (rw == READ && (dio->flags & DIO_LOCKING))
  1293 + mutex_unlock(&dio->inode->i_mutex);
  1294 +
  1295 + /*
  1296 + * The only time we want to leave bios in flight is when a successful
  1297 + * partial aio read or full aio write have been setup. In that case
  1298 + * bio completion will call aio_complete. The only time it's safe to
  1299 + * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
  1300 + * This had *better* be the only place that raises -EIOCBQUEUED.
  1301 + */
  1302 + BUG_ON(retval == -EIOCBQUEUED);
  1303 + if (dio->is_async && retval == 0 && dio->result &&
  1304 + ((rw & READ) || (dio->result == sdio.size)))
  1305 + retval = -EIOCBQUEUED;
  1306 +
  1307 + if (retval != -EIOCBQUEUED)
  1308 + dio_await_completion(dio);
  1309 +
  1310 + if (drop_refcount(dio) == 0) {
  1311 + retval = dio_complete(dio, offset, retval, false);
  1312 + kmem_cache_free(dio_cache, dio);
  1313 + } else
  1314 + BUG_ON(retval != -EIOCBQUEUED);
1322 1315  
1323 1316 out:
1324 1317 return retval;