Commit 08eee69fcf6baea543a2b4d2a2fcba0e61aa3160

Authored by Minchan Kim
Committed by Linus Torvalds
1 parent 2b269ce6fc

zram: remove init_lock in zram_make_request

Admin could reset zram during I/O operation going on so we have used
zram->init_lock as read-side lock in I/O path to prevent sudden zram
meta freeing.

However, the init_lock is really troublesome.  We can't do call
zram_meta_alloc under init_lock due to lockdep splat because
zram_rw_page is one of the function under reclaim path and hold it as
read_lock while other places in process context hold it as write_lock.
So, we have used allocation out of the lock to avoid lockdep warn but
it's not good for readability and fainally, I met another lockdep splat
between init_lock and cpu_hotplug from kmem_cache_destroy during working
zsmalloc compaction.  :(

Yes, the ideal is to remove horrible init_lock of zram in rw path.  This
patch removes it in rw path and instead, add atomic refcount for meta
lifetime management and completion to free meta in process context.
It's important to free meta in process context because some of resource
destruction needs mutex lock, which could be held if we releases the
resource in reclaim context so it's deadlock, again.

As a bonus, we could remove init_done check in rw path because
zram_meta_get will do a role for it, instead.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Ganesh Mahendran <opensource.ganesh@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 64 additions and 32 deletions Side-by-side Diff

drivers/block/zram/zram_drv.c
... ... @@ -53,9 +53,9 @@
53 53 } \
54 54 static DEVICE_ATTR_RO(name);
55 55  
56   -static inline int init_done(struct zram *zram)
  56 +static inline bool init_done(struct zram *zram)
57 57 {
58   - return zram->meta != NULL;
  58 + return zram->disksize;
59 59 }
60 60  
61 61 static inline struct zram *dev_to_zram(struct device *dev)
... ... @@ -356,6 +356,18 @@
356 356 return NULL;
357 357 }
358 358  
  359 +static inline bool zram_meta_get(struct zram *zram)
  360 +{
  361 + if (atomic_inc_not_zero(&zram->refcount))
  362 + return true;
  363 + return false;
  364 +}
  365 +
  366 +static inline void zram_meta_put(struct zram *zram)
  367 +{
  368 + atomic_dec(&zram->refcount);
  369 +}
  370 +
359 371 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
360 372 {
361 373 if (*offset + bvec->bv_len >= PAGE_SIZE)
... ... @@ -717,6 +729,10 @@
717 729  
718 730 static void zram_reset_device(struct zram *zram)
719 731 {
  732 + struct zram_meta *meta;
  733 + struct zcomp *comp;
  734 + u64 disksize;
  735 +
720 736 down_write(&zram->init_lock);
721 737  
722 738 zram->limit_pages = 0;
723 739  
724 740  
... ... @@ -726,16 +742,31 @@
726 742 return;
727 743 }
728 744  
729   - zcomp_destroy(zram->comp);
730   - zram->max_comp_streams = 1;
731   - zram_meta_free(zram->meta, zram->disksize);
732   - zram->meta = NULL;
  745 + meta = zram->meta;
  746 + comp = zram->comp;
  747 + disksize = zram->disksize;
  748 + /*
  749 + * Refcount will go down to 0 eventually and r/w handler
  750 + * cannot handle further I/O so it will bail out by
  751 + * check zram_meta_get.
  752 + */
  753 + zram_meta_put(zram);
  754 + /*
  755 + * We want to free zram_meta in process context to avoid
  756 + * deadlock between reclaim path and any other locks.
  757 + */
  758 + wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
  759 +
733 760 /* Reset stats */
734 761 memset(&zram->stats, 0, sizeof(zram->stats));
735 762 zram->disksize = 0;
  763 + zram->max_comp_streams = 1;
736 764 set_capacity(zram->disk, 0);
737 765  
738 766 up_write(&zram->init_lock);
  767 + /* I/O operation under all of CPU are done so let's free */
  768 + zram_meta_free(meta, disksize);
  769 + zcomp_destroy(comp);
739 770 }
740 771  
741 772 static ssize_t disksize_store(struct device *dev,
... ... @@ -771,6 +802,8 @@
771 802 goto out_destroy_comp;
772 803 }
773 804  
  805 + init_waitqueue_head(&zram->io_done);
  806 + atomic_set(&zram->refcount, 1);
774 807 zram->meta = meta;
775 808 zram->comp = comp;
776 809 zram->disksize = disksize;
777 810  
778 811  
779 812  
780 813  
... ... @@ -901,23 +934,21 @@
901 934 {
902 935 struct zram *zram = queue->queuedata;
903 936  
904   - down_read(&zram->init_lock);
905   - if (unlikely(!init_done(zram)))
  937 + if (unlikely(!zram_meta_get(zram)))
906 938 goto error;
907 939  
908 940 if (!valid_io_request(zram, bio->bi_iter.bi_sector,
909 941 bio->bi_iter.bi_size)) {
910 942 atomic64_inc(&zram->stats.invalid_io);
911   - goto error;
  943 + goto put_zram;
912 944 }
913 945  
914 946 __zram_make_request(zram, bio);
915   - up_read(&zram->init_lock);
916   -
  947 + zram_meta_put(zram);
917 948 return;
918   -
  949 +put_zram:
  950 + zram_meta_put(zram);
919 951 error:
920   - up_read(&zram->init_lock);
921 952 bio_io_error(bio);
922 953 }
923 954  
924 955  
925 956  
926 957  
... ... @@ -939,23 +970,21 @@
939 970 static int zram_rw_page(struct block_device *bdev, sector_t sector,
940 971 struct page *page, int rw)
941 972 {
942   - int offset, err;
  973 + int offset, err = -EIO;
943 974 u32 index;
944 975 struct zram *zram;
945 976 struct bio_vec bv;
946 977  
947 978 zram = bdev->bd_disk->private_data;
  979 + if (unlikely(!zram_meta_get(zram)))
  980 + goto out;
  981 +
948 982 if (!valid_io_request(zram, sector, PAGE_SIZE)) {
949 983 atomic64_inc(&zram->stats.invalid_io);
950   - return -EINVAL;
  984 + err = -EINVAL;
  985 + goto put_zram;
951 986 }
952 987  
953   - down_read(&zram->init_lock);
954   - if (unlikely(!init_done(zram))) {
955   - err = -EIO;
956   - goto out_unlock;
957   - }
958   -
959 988 index = sector >> SECTORS_PER_PAGE_SHIFT;
960 989 offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
961 990  
... ... @@ -964,8 +993,9 @@
964 993 bv.bv_offset = 0;
965 994  
966 995 err = zram_bvec_rw(zram, &bv, index, offset, rw);
967   -out_unlock:
968   - up_read(&zram->init_lock);
  996 +put_zram:
  997 + zram_meta_put(zram);
  998 +out:
969 999 /*
970 1000 * If I/O fails, just return error(ie, non-zero) without
971 1001 * calling page_endio.
drivers/block/zram/zram_drv.h
... ... @@ -100,24 +100,26 @@
100 100  
101 101 struct zram {
102 102 struct zram_meta *meta;
  103 + struct zcomp *comp;
103 104 struct request_queue *queue;
104 105 struct gendisk *disk;
105   - struct zcomp *comp;
106   -
107   - /* Prevent concurrent execution of device init, reset and R/W request */
  106 + /* Prevent concurrent execution of device init */
108 107 struct rw_semaphore init_lock;
109 108 /*
110   - * This is the limit on amount of *uncompressed* worth of data
111   - * we can store in a disk.
  109 + * the number of pages zram can consume for storing compressed data
112 110 */
113   - u64 disksize; /* bytes */
  111 + unsigned long limit_pages;
114 112 int max_comp_streams;
  113 +
115 114 struct zram_stats stats;
  115 + atomic_t refcount; /* refcount for zram_meta */
  116 + /* wait all IO under all of cpu are done */
  117 + wait_queue_head_t io_done;
116 118 /*
117   - * the number of pages zram can consume for storing compressed data
  119 + * This is the limit on amount of *uncompressed* worth of data
  120 + * we can store in a disk.
118 121 */
119   - unsigned long limit_pages;
120   -
  122 + u64 disksize; /* bytes */
121 123 char compressor[10];
122 124 };
123 125 #endif