Commit 4d0ddb2ce25db2254d468233d942276ecf40bff8

Authored by Tao Ma
Committed by Mark Fasheh
1 parent a4a4891164

ocfs2: Add inode stealing for ocfs2_reserve_new_inode

Inode allocation is modified to look in other nodes allocators during
extreme out of space situations. We retry our own slot when space is freed
back to the global bitmap, or whenever we've allocated more than 1024 inodes
from another slot.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

Showing 6 changed files with 116 additions and 5 deletions Side-by-side Diff

... ... @@ -5150,6 +5150,8 @@
5150 5150 status = ocfs2_flush_truncate_log(osb);
5151 5151 if (status < 0)
5152 5152 mlog_errno(status);
  5153 + else
  5154 + ocfs2_init_inode_steal_slot(osb);
5153 5155  
5154 5156 mlog_exit(status);
5155 5157 }
fs/ocfs2/localalloc.c
... ... @@ -447,6 +447,8 @@
447 447 iput(main_bm_inode);
448 448  
449 449 out:
  450 + if (!status)
  451 + ocfs2_init_inode_steal_slot(osb);
450 452 mlog_exit(status);
451 453 return status;
452 454 }
... ... @@ -424,7 +424,7 @@
424 424 fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
425 425 fe->i_blkno = cpu_to_le64(fe_blkno);
426 426 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
427   - fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
  427 + fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
428 428 fe->i_uid = cpu_to_le32(current->fsuid);
429 429 if (dir->i_mode & S_ISGID) {
430 430 fe->i_gid = cpu_to_le32(dir->i_gid);
... ... @@ -208,11 +208,14 @@
208 208 u32 s_feature_incompat;
209 209 u32 s_feature_ro_compat;
210 210  
211   - /* Protects s_next_generaion, osb_flags. Could protect more on
212   - * osb as it's very short lived. */
  211 + /* Protects s_next_generation, osb_flags and s_inode_steal_slot.
  212 + * Could protect more on osb as it's very short lived.
  213 + */
213 214 spinlock_t osb_lock;
214 215 u32 s_next_generation;
215 216 unsigned long osb_flags;
  217 + s16 s_inode_steal_slot;
  218 + atomic_t s_num_inodes_stolen;
216 219  
217 220 unsigned long s_mount_opt;
218 221 unsigned int s_atime_quantum;
... ... @@ -535,6 +538,33 @@
535 538 pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT);
536 539  
537 540 return pages_per_cluster;
  541 +}
  542 +
  543 +static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
  544 +{
  545 + spin_lock(&osb->osb_lock);
  546 + osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
  547 + spin_unlock(&osb->osb_lock);
  548 + atomic_set(&osb->s_num_inodes_stolen, 0);
  549 +}
  550 +
  551 +static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb,
  552 + s16 slot)
  553 +{
  554 + spin_lock(&osb->osb_lock);
  555 + osb->s_inode_steal_slot = slot;
  556 + spin_unlock(&osb->osb_lock);
  557 +}
  558 +
  559 +static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
  560 +{
  561 + s16 slot;
  562 +
  563 + spin_lock(&osb->osb_lock);
  564 + slot = osb->s_inode_steal_slot;
  565 + spin_unlock(&osb->osb_lock);
  566 +
  567 + return slot;
538 568 }
539 569  
540 570 #define ocfs2_set_bit ext2_set_bit
... ... @@ -49,6 +49,8 @@
49 49 #define NOT_ALLOC_NEW_GROUP 0
50 50 #define ALLOC_NEW_GROUP 1
51 51  
  52 +#define OCFS2_MAX_INODES_TO_STEAL 1024
  53 +
52 54 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
53 55 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
54 56 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
... ... @@ -109,7 +111,7 @@
109 111 u64 *bg_blkno,
110 112 u16 *bg_bit_off);
111 113  
112   -void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
  114 +static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
113 115 {
114 116 struct inode *inode = ac->ac_inode;
115 117  
116 118  
117 119  
... ... @@ -120,9 +122,17 @@
120 122 mutex_unlock(&inode->i_mutex);
121 123  
122 124 iput(inode);
  125 + ac->ac_inode = NULL;
123 126 }
124   - if (ac->ac_bh)
  127 + if (ac->ac_bh) {
125 128 brelse(ac->ac_bh);
  129 + ac->ac_bh = NULL;
  130 + }
  131 +}
  132 +
  133 +void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
  134 +{
  135 + ocfs2_free_ac_resource(ac);
126 136 kfree(ac);
127 137 }
128 138  
129 139  
... ... @@ -522,10 +532,42 @@
522 532 return status;
523 533 }
524 534  
  535 +static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
  536 + struct ocfs2_alloc_context *ac)
  537 +{
  538 + int i, status = -ENOSPC;
  539 + s16 slot = ocfs2_get_inode_steal_slot(osb);
  540 +
  541 + /* Start to steal inodes from the first slot after ours. */
  542 + if (slot == OCFS2_INVALID_SLOT)
  543 + slot = osb->slot_num + 1;
  544 +
  545 + for (i = 0; i < osb->max_slots; i++, slot++) {
  546 + if (slot == osb->max_slots)
  547 + slot = 0;
  548 +
  549 + if (slot == osb->slot_num)
  550 + continue;
  551 +
  552 + status = ocfs2_reserve_suballoc_bits(osb, ac,
  553 + INODE_ALLOC_SYSTEM_INODE,
  554 + slot, NOT_ALLOC_NEW_GROUP);
  555 + if (status >= 0) {
  556 + ocfs2_set_inode_steal_slot(osb, slot);
  557 + break;
  558 + }
  559 +
  560 + ocfs2_free_ac_resource(ac);
  561 + }
  562 +
  563 + return status;
  564 +}
  565 +
525 566 int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
526 567 struct ocfs2_alloc_context **ac)
527 568 {
528 569 int status;
  570 + s16 slot = ocfs2_get_inode_steal_slot(osb);
529 571  
530 572 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
531 573 if (!(*ac)) {
532 574  
... ... @@ -539,9 +581,43 @@
539 581  
540 582 (*ac)->ac_group_search = ocfs2_block_group_search;
541 583  
  584 + /*
  585 + * slot is set when we successfully steal inode from other nodes.
  586 + * It is reset in 3 places:
  587 + * 1. when we flush the truncate log
  588 + * 2. when we complete local alloc recovery.
  589 + * 3. when we successfully allocate from our own slot.
  590 + * After it is set, we will go on stealing inodes until we find the
  591 + * need to check our slots to see whether there is some space for us.
  592 + */
  593 + if (slot != OCFS2_INVALID_SLOT &&
  594 + atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
  595 + goto inode_steal;
  596 +
  597 + atomic_set(&osb->s_num_inodes_stolen, 0);
542 598 status = ocfs2_reserve_suballoc_bits(osb, *ac,
543 599 INODE_ALLOC_SYSTEM_INODE,
544 600 osb->slot_num, ALLOC_NEW_GROUP);
  601 + if (status >= 0) {
  602 + status = 0;
  603 +
  604 + /*
  605 + * Some inodes must be freed by us, so try to allocate
  606 + * from our own next time.
  607 + */
  608 + if (slot != OCFS2_INVALID_SLOT)
  609 + ocfs2_init_inode_steal_slot(osb);
  610 + goto bail;
  611 + } else if (status < 0 && status != -ENOSPC) {
  612 + mlog_errno(status);
  613 + goto bail;
  614 + }
  615 +
  616 + ocfs2_free_ac_resource(*ac);
  617 +
  618 +inode_steal:
  619 + status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
  620 + atomic_inc(&osb->s_num_inodes_stolen);
545 621 if (status < 0) {
546 622 if (status != -ENOSPC)
547 623 mlog_errno(status);
... ... @@ -1394,6 +1394,7 @@
1394 1394 INIT_LIST_HEAD(&osb->blocked_lock_list);
1395 1395 osb->blocked_lock_count = 0;
1396 1396 spin_lock_init(&osb->osb_lock);
  1397 + ocfs2_init_inode_steal_slot(osb);
1397 1398  
1398 1399 atomic_set(&osb->alloc_stats.moves, 0);
1399 1400 atomic_set(&osb->alloc_stats.local_data, 0);