Commit 60105e1248f571aa3b895cd63bef072ed9d90c77

Authored by Minchan Kim
Committed by Linus Torvalds
1 parent 6335b19344

mm/zswap: support multiple swap devices

Cai Liu reporeted that now zbud pool pages counting has a problem when
multiple swap is used because it just counts only one swap intead of all
of swap so zswap cannot control writeback properly.  The result is
unnecessary writeback or no writeback when we should really writeback.

IOW, it made zswap crazy.

Another problem in zswap is:

For example, let's assume we use two swap A and B with different
priority and A already has charged 19% long time ago and let's assume
that A swap is full now so VM start to use B so that B has charged 1%
recently.  It menas zswap charged (19% + 1%) is full by default.  Then,
if VM want to swap out more pages into B, zbud_reclaim_page would be
evict one of pages in B's pool and it would be repeated continuously.
It's totally LRU reverse problem and swap thrashing in B would happen.

This patch makes zswap consider mutliple swap by creating *a* zbud pool
which will be shared by multiple swap so all of zswap pages in multiple
swap keep order by LRU so it can prevent above two problems.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Reported-by: Cai Liu <cai.liu@samsung.com>
Suggested-by: Weijie Yang <weijie.yang.kh@gmail.com>
Cc: Seth Jennings <sjennings@variantweb.net>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 33 additions and 31 deletions Side-by-side Diff

... ... @@ -89,6 +89,9 @@
89 89 module_param_named(max_pool_percent,
90 90 zswap_max_pool_percent, uint, 0644);
91 91  
  92 +/* zbud_pool is shared by all of zswap backend */
  93 +static struct zbud_pool *zswap_pool;
  94 +
92 95 /*********************************
93 96 * compression functions
94 97 **********************************/
... ... @@ -189,7 +192,6 @@
189 192 struct zswap_tree {
190 193 struct rb_root rbroot;
191 194 spinlock_t lock;
192   - struct zbud_pool *pool;
193 195 };
194 196  
195 197 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
196 198  
197 199  
... ... @@ -285,13 +287,12 @@
285 287 * Carries out the common pattern of freeing and entry's zbud allocation,
286 288 * freeing the entry itself, and decrementing the number of stored pages.
287 289 */
288   -static void zswap_free_entry(struct zswap_tree *tree,
289   - struct zswap_entry *entry)
  290 +static void zswap_free_entry(struct zswap_entry *entry)
290 291 {
291   - zbud_free(tree->pool, entry->handle);
  292 + zbud_free(zswap_pool, entry->handle);
292 293 zswap_entry_cache_free(entry);
293 294 atomic_dec(&zswap_stored_pages);
294   - zswap_pool_pages = zbud_get_pool_size(tree->pool);
  295 + zswap_pool_pages = zbud_get_pool_size(zswap_pool);
295 296 }
296 297  
297 298 /* caller must hold the tree lock */
... ... @@ -311,7 +312,7 @@
311 312 BUG_ON(refcount < 0);
312 313 if (refcount == 0) {
313 314 zswap_rb_erase(&tree->rbroot, entry);
314   - zswap_free_entry(tree, entry);
  315 + zswap_free_entry(entry);
315 316 }
316 317 }
317 318  
... ... @@ -545,7 +546,6 @@
545 546 zbud_unmap(pool, handle);
546 547 tree = zswap_trees[swp_type(swpentry)];
547 548 offset = swp_offset(swpentry);
548   - BUG_ON(pool != tree->pool);
549 549  
550 550 /* find and ref zswap entry */
551 551 spin_lock(&tree->lock);
552 552  
... ... @@ -573,13 +573,13 @@
573 573 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
574 574 /* decompress */
575 575 dlen = PAGE_SIZE;
576   - src = (u8 *)zbud_map(tree->pool, entry->handle) +
  576 + src = (u8 *)zbud_map(zswap_pool, entry->handle) +
577 577 sizeof(struct zswap_header);
578 578 dst = kmap_atomic(page);
579 579 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
580 580 entry->length, dst, &dlen);
581 581 kunmap_atomic(dst);
582   - zbud_unmap(tree->pool, entry->handle);
  582 + zbud_unmap(zswap_pool, entry->handle);
583 583 BUG_ON(ret);
584 584 BUG_ON(dlen != PAGE_SIZE);
585 585  
... ... @@ -652,7 +652,7 @@
652 652 /* reclaim space if needed */
653 653 if (zswap_is_full()) {
654 654 zswap_pool_limit_hit++;
655   - if (zbud_reclaim_page(tree->pool, 8)) {
  655 + if (zbud_reclaim_page(zswap_pool, 8)) {
656 656 zswap_reject_reclaim_fail++;
657 657 ret = -ENOMEM;
658 658 goto reject;
... ... @@ -679,7 +679,7 @@
679 679  
680 680 /* store */
681 681 len = dlen + sizeof(struct zswap_header);
682   - ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN,
  682 + ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
683 683 &handle);
684 684 if (ret == -ENOSPC) {
685 685 zswap_reject_compress_poor++;
686 686  
... ... @@ -689,11 +689,11 @@
689 689 zswap_reject_alloc_fail++;
690 690 goto freepage;
691 691 }
692   - zhdr = zbud_map(tree->pool, handle);
  692 + zhdr = zbud_map(zswap_pool, handle);
693 693 zhdr->swpentry = swp_entry(type, offset);
694 694 buf = (u8 *)(zhdr + 1);
695 695 memcpy(buf, dst, dlen);
696   - zbud_unmap(tree->pool, handle);
  696 + zbud_unmap(zswap_pool, handle);
697 697 put_cpu_var(zswap_dstmem);
698 698  
699 699 /* populate entry */
... ... @@ -716,7 +716,7 @@
716 716  
717 717 /* update stats */
718 718 atomic_inc(&zswap_stored_pages);
719   - zswap_pool_pages = zbud_get_pool_size(tree->pool);
  719 + zswap_pool_pages = zbud_get_pool_size(zswap_pool);
720 720  
721 721 return 0;
722 722  
723 723  
... ... @@ -752,13 +752,13 @@
752 752  
753 753 /* decompress */
754 754 dlen = PAGE_SIZE;
755   - src = (u8 *)zbud_map(tree->pool, entry->handle) +
  755 + src = (u8 *)zbud_map(zswap_pool, entry->handle) +
756 756 sizeof(struct zswap_header);
757 757 dst = kmap_atomic(page);
758 758 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
759 759 dst, &dlen);
760 760 kunmap_atomic(dst);
761   - zbud_unmap(tree->pool, entry->handle);
  761 + zbud_unmap(zswap_pool, entry->handle);
762 762 BUG_ON(ret);
763 763  
764 764 spin_lock(&tree->lock);
765 765  
... ... @@ -804,11 +804,9 @@
804 804 /* walk the tree and free everything */
805 805 spin_lock(&tree->lock);
806 806 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
807   - zswap_free_entry(tree, entry);
  807 + zswap_free_entry(entry);
808 808 tree->rbroot = RB_ROOT;
809 809 spin_unlock(&tree->lock);
810   -
811   - zbud_destroy_pool(tree->pool);
812 810 kfree(tree);
813 811 zswap_trees[type] = NULL;
814 812 }
815 813  
... ... @@ -822,20 +820,14 @@
822 820 struct zswap_tree *tree;
823 821  
824 822 tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
825   - if (!tree)
826   - goto err;
827   - tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
828   - if (!tree->pool)
829   - goto freetree;
  823 + if (!tree) {
  824 + pr_err("alloc failed, zswap disabled for swap type %d\n", type);
  825 + return;
  826 + }
  827 +
830 828 tree->rbroot = RB_ROOT;
831 829 spin_lock_init(&tree->lock);
832 830 zswap_trees[type] = tree;
833   - return;
834   -
835   -freetree:
836   - kfree(tree);
837   -err:
838   - pr_err("alloc failed, zswap disabled for swap type %d\n", type);
839 831 }
840 832  
841 833 static struct frontswap_ops zswap_frontswap_ops = {
842 834  
... ... @@ -907,9 +899,16 @@
907 899 return 0;
908 900  
909 901 pr_info("loading zswap\n");
  902 +
  903 + zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
  904 + if (!zswap_pool) {
  905 + pr_err("zbud pool creation failed\n");
  906 + goto error;
  907 + }
  908 +
910 909 if (zswap_entry_cache_create()) {
911 910 pr_err("entry cache creation failed\n");
912   - goto error;
  911 + goto cachefail;
913 912 }
914 913 if (zswap_comp_init()) {
915 914 pr_err("compressor initialization failed\n");
... ... @@ -919,6 +918,7 @@
919 918 pr_err("per-cpu initialization failed\n");
920 919 goto pcpufail;
921 920 }
  921 +
922 922 frontswap_register_ops(&zswap_frontswap_ops);
923 923 if (zswap_debugfs_init())
924 924 pr_warn("debugfs initialization failed\n");
... ... @@ -927,6 +927,8 @@
927 927 zswap_comp_exit();
928 928 compfail:
929 929 zswap_entry_cache_destory();
  930 +cachefail:
  931 + zbud_destroy_pool(zswap_pool);
930 932 error:
931 933 return -ENOMEM;
932 934 }