Commit 7c9ca621137cde26be05448133fc1a554345f4f8

Authored by Bob Peterson
Committed by Steven Whitehouse
1 parent 9453615a1a

GFS2: Use rbtree for resource groups and clean up bitmap buffer ref count scheme

Here is an update of Bob's original rbtree patch which, in addition, also
resolves the rather strange ref counting that was being done relating to
the bitmap blocks.

Originally we had a dual system for journaling resource groups. The metadata
blocks were journaled and also the rgrp itself was added to a list. The reason
for adding the rgrp to the list in the journal was so that the "repolish
clones" code could be run to update the free space, and potentially send any
discard requests when the log was flushed. This was done by comparing the
"cloned" bitmap with what had been written back on disk during the transaction
commit.

Due to this, there was a requirement to hang on to the rgrps' bitmap buffers
until the journal had been flushed. For that reason, there was a rather
complicated set up in the ->go_lock ->go_unlock functions for rgrps involving
both a mutex and a spinlock (the ->sd_rindex_spin) to maintain a reference
count on the buffers.

However, the journal maintains a reference count on the buffers anyway, since
they are being journaled as metadata buffers. So by moving the code which deals
with the post-journal accounting for bitmap blocks to the metadata journaling
code, we can entirely dispense with the rather strange buffer ref counting
scheme and also the requirement to journal the rgrps.

The net result of all this is that the ->sd_rindex_spin is left to do exactly
one job, and that is to look after the rbtree or rgrps.

This patch is designed to be a stepping stone towards using RCU for the rbtree
of resource groups, however the reduction in the number of uses of the
->sd_rindex_spin is likely to have benefits for multi-threaded workloads,
anyway.

The patch retains ->go_lock and ->go_unlock for rgrps, however these maybe also
be removed in future in favour of calling the functions directly where required
in the code. That will allow locking of resource groups without needing to
actually read them in - something that could be useful in speeding up statfs.

In the mean time though it is valid to dereference ->bi_bh only when the rgrp
is locked. This is basically the same rule as before, modulo the references not
being valid until the following journal flush.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Cc: Benjamin Marzinski <bmarzins@redhat.com>

Showing 8 changed files with 148 additions and 345 deletions Side-by-side Diff

... ... @@ -134,6 +134,8 @@
134 134 static void rgrp_go_sync(struct gfs2_glock *gl)
135 135 {
136 136 struct address_space *metamapping = gfs2_glock2aspace(gl);
  137 + struct gfs2_rgrpd *rgd = gl->gl_object;
  138 + unsigned int x;
137 139 int error;
138 140  
139 141 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
... ... @@ -145,6 +147,15 @@
145 147 error = filemap_fdatawait(metamapping);
146 148 mapping_set_error(metamapping, error);
147 149 gfs2_ail_empty_gl(gl);
  150 +
  151 + if (!rgd)
  152 + return;
  153 +
  154 + for (x = 0; x < rgd->rd_length; x++) {
  155 + struct gfs2_bitmap *bi = rgd->rd_bits + x;
  156 + kfree(bi->bi_clone);
  157 + bi->bi_clone = NULL;
  158 + }
148 159 }
149 160  
150 161 /**
... ... @@ -445,33 +456,6 @@
445 456 }
446 457  
447 458 /**
448   - * rgrp_go_lock - operation done after an rgrp lock is locked by
449   - * a first holder on this node.
450   - * @gl: the glock
451   - * @flags:
452   - *
453   - * Returns: errno
454   - */
455   -
456   -static int rgrp_go_lock(struct gfs2_holder *gh)
457   -{
458   - return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
459   -}
460   -
461   -/**
462   - * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
463   - * a last holder on this node.
464   - * @gl: the glock
465   - * @flags:
466   - *
467   - */
468   -
469   -static void rgrp_go_unlock(struct gfs2_holder *gh)
470   -{
471   - gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
472   -}
473   -
474   -/**
475 459 * trans_go_sync - promote/demote the transaction glock
476 460 * @gl: the glock
477 461 * @state: the requested state
... ... @@ -573,8 +557,8 @@
573 557 const struct gfs2_glock_operations gfs2_rgrp_glops = {
574 558 .go_xmote_th = rgrp_go_sync,
575 559 .go_inval = rgrp_go_inval,
576   - .go_lock = rgrp_go_lock,
577   - .go_unlock = rgrp_go_unlock,
  560 + .go_lock = gfs2_rgrp_go_lock,
  561 + .go_unlock = gfs2_rgrp_go_unlock,
578 562 .go_dump = gfs2_rgrp_dump,
579 563 .go_type = LM_TYPE_RGRP,
580 564 .go_flags = GLOF_ASPACE,
... ... @@ -18,6 +18,7 @@
18 18 #include <linux/rcupdate.h>
19 19 #include <linux/rculist_bl.h>
20 20 #include <linux/completion.h>
  21 +#include <linux/rbtree.h>
21 22  
22 23 #define DIO_WAIT 0x00000010
23 24 #define DIO_METADATA 0x00000020
... ... @@ -78,8 +79,7 @@
78 79 };
79 80  
80 81 struct gfs2_rgrpd {
81   - struct list_head rd_list; /* Link with superblock */
82   - struct list_head rd_list_mru;
  82 + struct rb_node rd_node; /* Link with superblock */
83 83 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
84 84 u64 rd_addr; /* grp block disk address */
85 85 u64 rd_data0; /* first data location */
86 86  
... ... @@ -91,10 +91,7 @@
91 91 u32 rd_dinodes;
92 92 u64 rd_igeneration;
93 93 struct gfs2_bitmap *rd_bits;
94   - struct mutex rd_mutex;
95   - struct gfs2_log_element rd_le;
96 94 struct gfs2_sbd *rd_sbd;
97   - unsigned int rd_bh_count;
98 95 u32 rd_last_alloc;
99 96 u32 rd_flags;
100 97 #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */
... ... @@ -575,9 +572,7 @@
575 572 int sd_rindex_uptodate;
576 573 spinlock_t sd_rindex_spin;
577 574 struct mutex sd_rindex_mutex;
578   - struct list_head sd_rindex_list;
579   - struct list_head sd_rindex_mru_list;
580   - struct gfs2_rgrpd *sd_rindex_forward;
  575 + struct rb_root sd_rindex_tree;
581 576 unsigned int sd_rgrps;
582 577 unsigned int sd_max_rg_data;
583 578  
... ... @@ -60,6 +60,29 @@
60 60 trace_gfs2_pin(bd, 1);
61 61 }
62 62  
  63 +static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
  64 +{
  65 + return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
  66 +}
  67 +
  68 +static void maybe_release_space(struct gfs2_bufdata *bd)
  69 +{
  70 + struct gfs2_glock *gl = bd->bd_gl;
  71 + struct gfs2_sbd *sdp = gl->gl_sbd;
  72 + struct gfs2_rgrpd *rgd = gl->gl_object;
  73 + unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
  74 + struct gfs2_bitmap *bi = rgd->rd_bits + index;
  75 +
  76 + if (bi->bi_clone == 0)
  77 + return;
  78 + if (sdp->sd_args.ar_discard)
  79 + gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi);
  80 + memcpy(bi->bi_clone + bi->bi_offset,
  81 + bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
  82 + clear_bit(GBF_FULL, &bi->bi_flags);
  83 + rgd->rd_free_clone = rgd->rd_free;
  84 +}
  85 +
63 86 /**
64 87 * gfs2_unpin - Unpin a buffer
65 88 * @sdp: the filesystem the buffer belongs to
... ... @@ -81,6 +104,9 @@
81 104 mark_buffer_dirty(bh);
82 105 clear_buffer_pinned(bh);
83 106  
  107 + if (buffer_is_rgrp(bd))
  108 + maybe_release_space(bd);
  109 +
84 110 spin_lock(&sdp->sd_ail_lock);
85 111 if (bd->bd_ail) {
86 112 list_del(&bd->bd_ail_st_list);
... ... @@ -469,42 +495,6 @@
469 495 gfs2_revoke_clean(sdp);
470 496 }
471 497  
472   -static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
473   -{
474   - struct gfs2_rgrpd *rgd;
475   - struct gfs2_trans *tr = current->journal_info;
476   -
477   - tr->tr_touched = 1;
478   -
479   - rgd = container_of(le, struct gfs2_rgrpd, rd_le);
480   -
481   - gfs2_log_lock(sdp);
482   - if (!list_empty(&le->le_list)){
483   - gfs2_log_unlock(sdp);
484   - return;
485   - }
486   - gfs2_rgrp_bh_hold(rgd);
487   - sdp->sd_log_num_rg++;
488   - list_add(&le->le_list, &sdp->sd_log_le_rg);
489   - gfs2_log_unlock(sdp);
490   -}
491   -
492   -static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
493   -{
494   - struct list_head *head = &sdp->sd_log_le_rg;
495   - struct gfs2_rgrpd *rgd;
496   -
497   - while (!list_empty(head)) {
498   - rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
499   - list_del_init(&rgd->rd_le.le_list);
500   - sdp->sd_log_num_rg--;
501   -
502   - gfs2_rgrp_repolish_clones(rgd);
503   - gfs2_rgrp_bh_put(rgd);
504   - }
505   - gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
506   -}
507   -
508 498 /**
509 499 * databuf_lo_add - Add a databuf to the transaction.
510 500 *
... ... @@ -771,8 +761,6 @@
771 761 };
772 762  
773 763 const struct gfs2_log_operations gfs2_rg_lops = {
774   - .lo_add = rg_lo_add,
775   - .lo_after_commit = rg_lo_after_commit,
776 764 .lo_name = "rg",
777 765 };
778 766  
fs/gfs2/ops_fstype.c
... ... @@ -77,8 +77,7 @@
77 77  
78 78 spin_lock_init(&sdp->sd_rindex_spin);
79 79 mutex_init(&sdp->sd_rindex_mutex);
80   - INIT_LIST_HEAD(&sdp->sd_rindex_list);
81   - INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
  80 + sdp->sd_rindex_tree.rb_node = NULL;
82 81  
83 82 INIT_LIST_HEAD(&sdp->sd_jindex_list);
84 83 spin_lock_init(&sdp->sd_jindex_spin);
... ... @@ -15,6 +15,7 @@
15 15 #include <linux/gfs2_ondisk.h>
16 16 #include <linux/prefetch.h>
17 17 #include <linux/blkdev.h>
  18 +#include <linux/rbtree.h>
18 19  
19 20 #include "gfs2.h"
20 21 #include "incore.h"
21 22  
22 23  
... ... @@ -328,15 +329,25 @@
328 329  
329 330 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
330 331 {
331   - struct gfs2_rgrpd *rgd;
  332 + struct rb_node **newn, *parent = NULL;
332 333  
333 334 spin_lock(&sdp->sd_rindex_spin);
334 335  
335   - list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
336   - if (rgrp_contains_block(rgd, blk)) {
337   - list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
  336 + newn = &sdp->sd_rindex_tree.rb_node;
  337 +
  338 + /* Figure out where to put new node */
  339 + while (*newn) {
  340 + struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
  341 + rd_node);
  342 +
  343 + parent = *newn;
  344 + if (blk < cur->rd_addr)
  345 + newn = &((*newn)->rb_left);
  346 + else if (blk > cur->rd_data0 + cur->rd_data)
  347 + newn = &((*newn)->rb_right);
  348 + else {
338 349 spin_unlock(&sdp->sd_rindex_spin);
339   - return rgd;
  350 + return cur;
340 351 }
341 352 }
342 353  
... ... @@ -354,8 +365,13 @@
354 365  
355 366 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
356 367 {
357   - gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
358   - return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
  368 + const struct rb_node *n;
  369 + struct gfs2_rgrpd *rgd;
  370 +
  371 + n = rb_first(&sdp->sd_rindex_tree);
  372 + rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
  373 +
  374 + return rgd;
359 375 }
360 376  
361 377 /**
362 378  
363 379  
364 380  
365 381  
... ... @@ -367,28 +383,34 @@
367 383  
368 384 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
369 385 {
370   - if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
  386 + struct gfs2_sbd *sdp = rgd->rd_sbd;
  387 + const struct rb_node *n;
  388 +
  389 + spin_lock(&sdp->sd_rindex_spin);
  390 + n = rb_next(&rgd->rd_node);
  391 + if (n == NULL)
  392 + n = rb_first(&sdp->sd_rindex_tree);
  393 +
  394 + if (unlikely(&rgd->rd_node == n)) {
  395 + spin_unlock(&sdp->sd_rindex_spin);
371 396 return NULL;
372   - return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
  397 + }
  398 + rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
  399 + spin_unlock(&sdp->sd_rindex_spin);
  400 + return rgd;
373 401 }
374 402  
375 403 static void clear_rgrpdi(struct gfs2_sbd *sdp)
376 404 {
377   - struct list_head *head;
  405 + struct rb_node *n;
378 406 struct gfs2_rgrpd *rgd;
379 407 struct gfs2_glock *gl;
380 408  
381   - spin_lock(&sdp->sd_rindex_spin);
382   - sdp->sd_rindex_forward = NULL;
383   - spin_unlock(&sdp->sd_rindex_spin);
384   -
385   - head = &sdp->sd_rindex_list;
386   - while (!list_empty(head)) {
387   - rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
  409 + while ((n = rb_first(&sdp->sd_rindex_tree))) {
  410 + rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
388 411 gl = rgd->rd_gl;
389 412  
390   - list_del(&rgd->rd_list);
391   - list_del(&rgd->rd_list_mru);
  413 + rb_erase(n, &sdp->sd_rindex_tree);
392 414  
393 415 if (gl) {
394 416 gl->gl_object = NULL;
... ... @@ -535,6 +557,29 @@
535 557 rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
536 558 }
537 559  
  560 +static void rgd_insert(struct gfs2_rgrpd *rgd)
  561 +{
  562 + struct gfs2_sbd *sdp = rgd->rd_sbd;
  563 + struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
  564 +
  565 + /* Figure out where to put new node */
  566 + while (*newn) {
  567 + struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
  568 + rd_node);
  569 +
  570 + parent = *newn;
  571 + if (rgd->rd_addr < cur->rd_addr)
  572 + newn = &((*newn)->rb_left);
  573 + else if (rgd->rd_addr > cur->rd_addr)
  574 + newn = &((*newn)->rb_right);
  575 + else
  576 + return;
  577 + }
  578 +
  579 + rb_link_node(&rgd->rd_node, parent, newn);
  580 + rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
  581 +}
  582 +
538 583 /**
539 584 * read_rindex_entry - Pull in a new resource index entry from the disk
540 585 * @gl: The glock covering the rindex inode
541 586  
542 587  
... ... @@ -566,14 +611,11 @@
566 611 if (!rgd)
567 612 return error;
568 613  
569   - mutex_init(&rgd->rd_mutex);
570   - lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
571 614 rgd->rd_sbd = sdp;
572 615  
573   - list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
574   - list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
575   -
576 616 gfs2_rindex_in(rgd, buf);
  617 + rgd_insert(rgd);
  618 +
577 619 error = compute_bitstructs(rgd);
578 620 if (error)
579 621 return error;
... ... @@ -585,6 +627,8 @@
585 627  
586 628 rgd->rd_gl->gl_object = rgd;
587 629 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
  630 + if (rgd->rd_data > sdp->sd_max_rg_data)
  631 + sdp->sd_max_rg_data = rgd->rd_data;
588 632 return error;
589 633 }
590 634  
... ... @@ -601,8 +645,6 @@
601 645 struct inode *inode = &ip->i_inode;
602 646 struct file_ra_state ra_state;
603 647 u64 rgrp_count = i_size_read(inode);
604   - struct gfs2_rgrpd *rgd;
605   - unsigned int max_data = 0;
606 648 int error;
607 649  
608 650 do_div(rgrp_count, sizeof(struct gfs2_rindex));
... ... @@ -617,10 +659,6 @@
617 659 }
618 660 }
619 661  
620   - list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
621   - if (rgd->rd_data > max_data)
622   - max_data = rgd->rd_data;
623   - sdp->sd_max_rg_data = max_data;
624 662 sdp->sd_rindex_uptodate = 1;
625 663 return 0;
626 664 }
... ... @@ -694,7 +732,7 @@
694 732 }
695 733  
696 734 /**
697   - * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
  735 + * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
698 736 * @rgd: the struct gfs2_rgrpd describing the RG to read in
699 737 *
700 738 * Read in all of a Resource Group's header and bitmap blocks.
701 739  
... ... @@ -703,8 +741,9 @@
703 741 * Returns: errno
704 742 */
705 743  
706   -int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
  744 +int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
707 745 {
  746 + struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
708 747 struct gfs2_sbd *sdp = rgd->rd_sbd;
709 748 struct gfs2_glock *gl = rgd->rd_gl;
710 749 unsigned int length = rgd->rd_length;
... ... @@ -712,17 +751,6 @@
712 751 unsigned int x, y;
713 752 int error;
714 753  
715   - mutex_lock(&rgd->rd_mutex);
716   -
717   - spin_lock(&sdp->sd_rindex_spin);
718   - if (rgd->rd_bh_count) {
719   - rgd->rd_bh_count++;
720   - spin_unlock(&sdp->sd_rindex_spin);
721   - mutex_unlock(&rgd->rd_mutex);
722   - return 0;
723   - }
724   - spin_unlock(&sdp->sd_rindex_spin);
725   -
726 754 for (x = 0; x < length; x++) {
727 755 bi = rgd->rd_bits + x;
728 756 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
729 757  
... ... @@ -747,15 +775,9 @@
747 775 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
748 776 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
749 777 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
  778 + rgd->rd_free_clone = rgd->rd_free;
750 779 }
751 780  
752   - spin_lock(&sdp->sd_rindex_spin);
753   - rgd->rd_free_clone = rgd->rd_free;
754   - rgd->rd_bh_count++;
755   - spin_unlock(&sdp->sd_rindex_spin);
756   -
757   - mutex_unlock(&rgd->rd_mutex);
758   -
759 781 return 0;
760 782  
761 783 fail:
762 784  
763 785  
764 786  
765 787  
766 788  
767 789  
768 790  
769 791  
... ... @@ -765,52 +787,32 @@
765 787 bi->bi_bh = NULL;
766 788 gfs2_assert_warn(sdp, !bi->bi_clone);
767 789 }
768   - mutex_unlock(&rgd->rd_mutex);
769 790  
770 791 return error;
771 792 }
772 793  
773   -void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
774   -{
775   - struct gfs2_sbd *sdp = rgd->rd_sbd;
776   -
777   - spin_lock(&sdp->sd_rindex_spin);
778   - gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
779   - rgd->rd_bh_count++;
780   - spin_unlock(&sdp->sd_rindex_spin);
781   -}
782   -
783 794 /**
784   - * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
  795 + * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
785 796 * @rgd: the struct gfs2_rgrpd describing the RG to read in
786 797 *
787 798 */
788 799  
789   -void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
  800 +void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
790 801 {
791   - struct gfs2_sbd *sdp = rgd->rd_sbd;
  802 + struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
792 803 int x, length = rgd->rd_length;
793 804  
794   - spin_lock(&sdp->sd_rindex_spin);
795   - gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
796   - if (--rgd->rd_bh_count) {
797   - spin_unlock(&sdp->sd_rindex_spin);
798   - return;
799   - }
800   -
801 805 for (x = 0; x < length; x++) {
802 806 struct gfs2_bitmap *bi = rgd->rd_bits + x;
803   - kfree(bi->bi_clone);
804   - bi->bi_clone = NULL;
805 807 brelse(bi->bi_bh);
806 808 bi->bi_bh = NULL;
807 809 }
808 810  
809   - spin_unlock(&sdp->sd_rindex_spin);
810 811 }
811 812  
812   -static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
813   - const struct gfs2_bitmap *bi)
  813 +void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
  814 + struct buffer_head *bh,
  815 + const struct gfs2_bitmap *bi)
814 816 {
815 817 struct super_block *sb = sdp->sd_vfs;
816 818 struct block_device *bdev = sb->s_bdev;
... ... @@ -823,7 +825,7 @@
823 825 unsigned int x;
824 826  
825 827 for (x = 0; x < bi->bi_len; x++) {
826   - const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x;
  828 + const u8 *orig = bh->b_data + bi->bi_offset + x;
827 829 const u8 *clone = bi->bi_clone + bi->bi_offset + x;
828 830 u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
829 831 diff &= 0x55;
... ... @@ -862,28 +864,6 @@
862 864 sdp->sd_args.ar_discard = 0;
863 865 }
864 866  
865   -void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
866   -{
867   - struct gfs2_sbd *sdp = rgd->rd_sbd;
868   - unsigned int length = rgd->rd_length;
869   - unsigned int x;
870   -
871   - for (x = 0; x < length; x++) {
872   - struct gfs2_bitmap *bi = rgd->rd_bits + x;
873   - if (!bi->bi_clone)
874   - continue;
875   - if (sdp->sd_args.ar_discard)
876   - gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
877   - clear_bit(GBF_FULL, &bi->bi_flags);
878   - memcpy(bi->bi_clone + bi->bi_offset,
879   - bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
880   - }
881   -
882   - spin_lock(&sdp->sd_rindex_spin);
883   - rgd->rd_free_clone = rgd->rd_free;
884   - spin_unlock(&sdp->sd_rindex_spin);
885   -}
886   -
887 867 /**
888 868 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
889 869 * @ip: the incore GFS2 inode structure
890 870  
891 871  
892 872  
893 873  
... ... @@ -911,20 +891,15 @@
911 891  
912 892 static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
913 893 {
914   - struct gfs2_sbd *sdp = rgd->rd_sbd;
915   - int ret = 0;
916   -
917 894 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
918 895 return 0;
919 896  
920   - spin_lock(&sdp->sd_rindex_spin);
921 897 if (rgd->rd_free_clone >= al->al_requested) {
922 898 al->al_rgd = rgd;
923   - ret = 1;
  899 + return 1;
924 900 }
925   - spin_unlock(&sdp->sd_rindex_spin);
926 901  
927   - return ret;
  902 + return 0;
928 903 }
929 904  
930 905 /**
... ... @@ -992,76 +967,6 @@
992 967 }
993 968  
994 969 /**
995   - * recent_rgrp_next - get next RG from "recent" list
996   - * @cur_rgd: current rgrp
997   - *
998   - * Returns: The next rgrp in the recent list
999   - */
1000   -
1001   -static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd)
1002   -{
1003   - struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
1004   - struct list_head *head;
1005   - struct gfs2_rgrpd *rgd;
1006   -
1007   - spin_lock(&sdp->sd_rindex_spin);
1008   - head = &sdp->sd_rindex_mru_list;
1009   - if (unlikely(cur_rgd->rd_list_mru.next == head)) {
1010   - spin_unlock(&sdp->sd_rindex_spin);
1011   - return NULL;
1012   - }
1013   - rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru);
1014   - spin_unlock(&sdp->sd_rindex_spin);
1015   - return rgd;
1016   -}
1017   -
1018   -/**
1019   - * forward_rgrp_get - get an rgrp to try next from full list
1020   - * @sdp: The GFS2 superblock
1021   - *
1022   - * Returns: The rgrp to try next
1023   - */
1024   -
1025   -static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
1026   -{
1027   - struct gfs2_rgrpd *rgd;
1028   - unsigned int journals = gfs2_jindex_size(sdp);
1029   - unsigned int rg = 0, x;
1030   -
1031   - spin_lock(&sdp->sd_rindex_spin);
1032   -
1033   - rgd = sdp->sd_rindex_forward;
1034   - if (!rgd) {
1035   - if (sdp->sd_rgrps >= journals)
1036   - rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
1037   -
1038   - for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg;
1039   - x++, rgd = gfs2_rgrpd_get_next(rgd))
1040   - /* Do Nothing */;
1041   -
1042   - sdp->sd_rindex_forward = rgd;
1043   - }
1044   -
1045   - spin_unlock(&sdp->sd_rindex_spin);
1046   -
1047   - return rgd;
1048   -}
1049   -
1050   -/**
1051   - * forward_rgrp_set - set the forward rgrp pointer
1052   - * @sdp: the filesystem
1053   - * @rgd: The new forward rgrp
1054   - *
1055   - */
1056   -
1057   -static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
1058   -{
1059   - spin_lock(&sdp->sd_rindex_spin);
1060   - sdp->sd_rindex_forward = rgd;
1061   - spin_unlock(&sdp->sd_rindex_spin);
1062   -}
1063   -
1064   -/**
1065 970 * get_local_rgrp - Choose and lock a rgrp for allocation
1066 971 * @ip: the inode to reserve space for
1067 972 * @rgp: the chosen and locked rgrp
1068 973  
1069 974  
1070 975  
... ... @@ -1076,14 +981,15 @@
1076 981 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1077 982 struct gfs2_rgrpd *rgd, *begin = NULL;
1078 983 struct gfs2_alloc *al = ip->i_alloc;
1079   - int flags = LM_FLAG_TRY;
1080   - int skipped = 0;
1081   - int loops = 0;
1082 984 int error, rg_locked;
  985 + int loops = 0;
1083 986  
1084   - rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
  987 + rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal);
1085 988  
1086   - while (rgd) {
  989 + if (rgd == NULL)
  990 + return -EBADSLT;
  991 +
  992 + while (loops < 3) {
1087 993 rg_locked = 0;
1088 994  
1089 995 if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1090 996  
... ... @@ -1096,14 +1002,16 @@
1096 1002 switch (error) {
1097 1003 case 0:
1098 1004 if (try_rgrp_fit(rgd, al))
1099   - goto out;
  1005 + return 0;
1100 1006 if (rgd->rd_flags & GFS2_RDF_CHECK)
1101 1007 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1102 1008 if (!rg_locked)
1103 1009 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1104 1010 /* fall through */
1105 1011 case GLR_TRYFAILED:
1106   - rgd = recent_rgrp_next(rgd);
  1012 + rgd = gfs2_rgrpd_get_next(rgd);
  1013 + if (rgd == begin)
  1014 + loops++;
1107 1015 break;
1108 1016  
1109 1017 default:
... ... @@ -1111,65 +1019,7 @@
1111 1019 }
1112 1020 }
1113 1021  
1114   - /* Go through full list of rgrps */
1115   -
1116   - begin = rgd = forward_rgrp_get(sdp);
1117   -
1118   - for (;;) {
1119   - rg_locked = 0;
1120   -
1121   - if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1122   - rg_locked = 1;
1123   - error = 0;
1124   - } else {
1125   - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
1126   - &al->al_rgd_gh);
1127   - }
1128   - switch (error) {
1129   - case 0:
1130   - if (try_rgrp_fit(rgd, al))
1131   - goto out;
1132   - if (rgd->rd_flags & GFS2_RDF_CHECK)
1133   - try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1134   - if (!rg_locked)
1135   - gfs2_glock_dq_uninit(&al->al_rgd_gh);
1136   - break;
1137   -
1138   - case GLR_TRYFAILED:
1139   - skipped++;
1140   - break;
1141   -
1142   - default:
1143   - return error;
1144   - }
1145   -
1146   - rgd = gfs2_rgrpd_get_next(rgd);
1147   - if (!rgd)
1148   - rgd = gfs2_rgrpd_get_first(sdp);
1149   -
1150   - if (rgd == begin) {
1151   - if (++loops >= 3)
1152   - return -ENOSPC;
1153   - if (!skipped)
1154   - loops++;
1155   - flags = 0;
1156   - if (loops == 2)
1157   - gfs2_log_flush(sdp, NULL);
1158   - }
1159   - }
1160   -
1161   -out:
1162   - if (begin) {
1163   - spin_lock(&sdp->sd_rindex_spin);
1164   - list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
1165   - spin_unlock(&sdp->sd_rindex_spin);
1166   - rgd = gfs2_rgrpd_get_next(rgd);
1167   - if (!rgd)
1168   - rgd = gfs2_rgrpd_get_first(sdp);
1169   - forward_rgrp_set(sdp, rgd);
1170   - }
1171   -
1172   - return 0;
  1022 + return -ENOSPC;
1173 1023 }
1174 1024  
1175 1025 /**
... ... @@ -1352,6 +1202,7 @@
1352 1202 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1353 1203 bitmaps, so we must search the originals for that. */
1354 1204 buffer = bi->bi_bh->b_data + bi->bi_offset;
  1205 + WARN_ON(!buffer_uptodate(bi->bi_bh));
1355 1206 if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1356 1207 buffer = bi->bi_clone + bi->bi_offset;
1357 1208  
... ... @@ -1371,6 +1222,7 @@
1371 1222  
1372 1223 if (blk == BFITNOENT)
1373 1224 return blk;
  1225 +
1374 1226 *n = 1;
1375 1227 if (old_state == new_state)
1376 1228 goto out;
1377 1229  
... ... @@ -1539,9 +1391,7 @@
1539 1391 gfs2_statfs_change(sdp, 0, -(s64)*n, 0);
1540 1392 gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
1541 1393  
1542   - spin_lock(&sdp->sd_rindex_spin);
1543 1394 rgd->rd_free_clone -= *n;
1544   - spin_unlock(&sdp->sd_rindex_spin);
1545 1395 trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
1546 1396 *bn = block;
1547 1397 return 0;
1548 1398  
... ... @@ -1594,9 +1444,7 @@
1594 1444 gfs2_statfs_change(sdp, 0, -1, +1);
1595 1445 gfs2_trans_add_unrevoke(sdp, block, 1);
1596 1446  
1597   - spin_lock(&sdp->sd_rindex_spin);
1598 1447 rgd->rd_free_clone--;
1599   - spin_unlock(&sdp->sd_rindex_spin);
1600 1448 trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
1601 1449 *bn = block;
1602 1450 return 0;
... ... @@ -1629,8 +1477,6 @@
1629 1477 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1630 1478 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1631 1479  
1632   - gfs2_trans_add_rg(rgd);
1633   -
1634 1480 /* Directories keep their data in the metadata address space */
1635 1481 if (meta || ip->i_depth)
1636 1482 gfs2_meta_wipe(ip, bstart, blen);
... ... @@ -1666,7 +1512,6 @@
1666 1512 trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
1667 1513 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1668 1514 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1669   - gfs2_trans_add_rg(rgd);
1670 1515 }
1671 1516  
1672 1517 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
... ... @@ -1688,7 +1533,6 @@
1688 1533 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1689 1534  
1690 1535 gfs2_statfs_change(sdp, 0, +1, -1);
1691   - gfs2_trans_add_rg(rgd);
1692 1536 }
1693 1537  
1694 1538  
... ... @@ -25,12 +25,9 @@
25 25 extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
26 26 extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
27 27  
28   -extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
29   -extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
30   -extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
  28 +extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
  29 +extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
31 30  
32   -extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
33   -
34 31 extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
35 32 static inline void gfs2_alloc_put(struct gfs2_inode *ip)
36 33 {
... ... @@ -72,6 +69,9 @@
72 69 extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
73 70 extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
74 71 extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
  72 +extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
  73 + struct buffer_head *bh,
  74 + const struct gfs2_bitmap *bi);
75 75  
76 76 #endif /* __RGRP_DOT_H__ */
... ... @@ -184,9 +184,4 @@
184 184 }
185 185 gfs2_log_unlock(sdp);
186 186 }
187   -
188   -void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
189   -{
190   - lops_add(rgd->rd_sbd, &rgd->rd_le);
191   -}
... ... @@ -34,15 +34,13 @@
34 34 al->al_requested + 1 : al->al_rgd->rd_length;
35 35 }
36 36  
37   -int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
38   - unsigned int revokes);
  37 +extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
  38 + unsigned int revokes);
39 39  
40   -void gfs2_trans_end(struct gfs2_sbd *sdp);
41   -
42   -void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
43   -void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
44   -void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
45   -void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
  40 +extern void gfs2_trans_end(struct gfs2_sbd *sdp);
  41 +extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
  42 +extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
  43 +extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
46 44  
47 45 #endif /* __TRANS_DOT_H__ */