Commit 574c3fdae3890e60f8bc59e8107686944ba1e446

Authored by Linus Torvalds

Merge branch 'linux-next' of git://git.infradead.org/~dedekind/ubi-2.6

* 'linux-next' of git://git.infradead.org/~dedekind/ubi-2.6:
  UBI: fix checkpatch.pl warnings
  UBI: simplify PEB protection code
  UBI: prepare for protection tree improvements
  UBI: return -ENOMEM upon failing vmalloc
  UBI: document UBI ioctls
  UBI: handle write errors in WL worker
  UBI: fix error path
  UBI: some code re-structuring
  UBI: fix deadlock
  UBI: fix warnings when debugging is enabled

Showing 8 changed files Side-by-side Diff

Documentation/ioctl/ioctl-number.txt
... ... @@ -97,6 +97,7 @@
97 97 <http://linux01.gwdg.de/~alatham/ppdd.html>
98 98 'M' all linux/soundcard.h
99 99 'N' 00-1F drivers/usb/scanner.h
  100 +'O' 00-02 include/mtd/ubi-user.h UBI
100 101 'P' all linux/soundcard.h
101 102 'Q' all linux/soundcard.h
102 103 'R' 00-1F linux/random.h
... ... @@ -142,6 +143,9 @@
142 143 'n' 00-7F linux/ncp_fs.h
143 144 'n' E0-FF video/matrox.h matroxfb
144 145 'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
  146 +'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
  147 +'o' 40-41 include/mtd/ubi-user.h UBI
  148 +'o' 01-A1 include/linux/dvb/*.h DVB
145 149 'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this)
146 150 'p' 00-3F linux/mc146818rtc.h conflict!
147 151 'p' 40-7F linux/nvram.h
drivers/mtd/ubi/build.c
... ... @@ -815,19 +815,20 @@
815 815 if (err)
816 816 goto out_free;
817 817  
  818 + err = -ENOMEM;
818 819 ubi->peb_buf1 = vmalloc(ubi->peb_size);
819 820 if (!ubi->peb_buf1)
820 821 goto out_free;
821 822  
822 823 ubi->peb_buf2 = vmalloc(ubi->peb_size);
823 824 if (!ubi->peb_buf2)
824   - goto out_free;
  825 + goto out_free;
825 826  
826 827 #ifdef CONFIG_MTD_UBI_DEBUG
827 828 mutex_init(&ubi->dbg_buf_mutex);
828 829 ubi->dbg_peb_buf = vmalloc(ubi->peb_size);
829 830 if (!ubi->dbg_peb_buf)
830   - goto out_free;
  831 + goto out_free;
831 832 #endif
832 833  
833 834 err = attach_by_scanning(ubi);
drivers/mtd/ubi/cdev.c
... ... @@ -721,7 +721,8 @@
721 721 * It seems we need to remove volume with name @re->new_name,
722 722 * if it exists.
723 723 */
724   - desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE);
  724 + desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name,
  725 + UBI_EXCLUSIVE);
725 726 if (IS_ERR(desc)) {
726 727 err = PTR_ERR(desc);
727 728 if (err == -ENODEV)
drivers/mtd/ubi/debug.h
... ... @@ -27,11 +27,11 @@
27 27 #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
28 28  
29 29 #define ubi_assert(expr) do { \
30   - if (unlikely(!(expr))) { \
31   - printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
32   - __func__, __LINE__, current->pid); \
33   - ubi_dbg_dump_stack(); \
34   - } \
  30 + if (unlikely(!(expr))) { \
  31 + printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
  32 + __func__, __LINE__, current->pid); \
  33 + ubi_dbg_dump_stack(); \
  34 + } \
35 35 } while (0)
36 36  
37 37 #define dbg_msg(fmt, ...) \
drivers/mtd/ubi/eba.c
... ... @@ -504,12 +504,9 @@
504 504 if (!vid_hdr)
505 505 return -ENOMEM;
506 506  
507   - mutex_lock(&ubi->buf_mutex);
508   -
509 507 retry:
510 508 new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN);
511 509 if (new_pnum < 0) {
512   - mutex_unlock(&ubi->buf_mutex);
513 510 ubi_free_vid_hdr(ubi, vid_hdr);
514 511 return new_pnum;
515 512 }
516 513  
517 514  
518 515  
... ... @@ -529,20 +526,23 @@
529 526 goto write_error;
530 527  
531 528 data_size = offset + len;
  529 + mutex_lock(&ubi->buf_mutex);
532 530 memset(ubi->peb_buf1 + offset, 0xFF, len);
533 531  
534 532 /* Read everything before the area where the write failure happened */
535 533 if (offset > 0) {
536 534 err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset);
537 535 if (err && err != UBI_IO_BITFLIPS)
538   - goto out_put;
  536 + goto out_unlock;
539 537 }
540 538  
541 539 memcpy(ubi->peb_buf1 + offset, buf, len);
542 540  
543 541 err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size);
544   - if (err)
  542 + if (err) {
  543 + mutex_unlock(&ubi->buf_mutex);
545 544 goto write_error;
  545 + }
546 546  
547 547 mutex_unlock(&ubi->buf_mutex);
548 548 ubi_free_vid_hdr(ubi, vid_hdr);
549 549  
... ... @@ -553,8 +553,9 @@
553 553 ubi_msg("data was successfully recovered");
554 554 return 0;
555 555  
556   -out_put:
  556 +out_unlock:
557 557 mutex_unlock(&ubi->buf_mutex);
  558 +out_put:
558 559 ubi_wl_put_peb(ubi, new_pnum, 1);
559 560 ubi_free_vid_hdr(ubi, vid_hdr);
560 561 return err;
... ... @@ -567,7 +568,6 @@
567 568 ubi_warn("failed to write to PEB %d", new_pnum);
568 569 ubi_wl_put_peb(ubi, new_pnum, 1);
569 570 if (++tries > UBI_IO_RETRIES) {
570   - mutex_unlock(&ubi->buf_mutex);
571 571 ubi_free_vid_hdr(ubi, vid_hdr);
572 572 return err;
573 573 }
... ... @@ -949,10 +949,14 @@
949 949 * This function copies logical eraseblock from physical eraseblock @from to
950 950 * physical eraseblock @to. The @vid_hdr buffer may be changed by this
951 951 * function. Returns:
952   - * o %0 in case of success;
953   - * o %1 if the operation was canceled and should be tried later (e.g.,
954   - * because a bit-flip was detected at the target PEB);
955   - * o %2 if the volume is being deleted and this LEB should not be moved.
  952 + * o %0 in case of success;
  953 + * o %1 if the operation was canceled because the volume is being deleted
  954 + * or because the PEB was put meanwhile;
  955 + * o %2 if the operation was canceled because there was a write error to the
  956 + * target PEB;
  957 + * o %-EAGAIN if the operation was canceled because a bit-flip was detected
  958 + * in the target PEB;
  959 + * o a negative error code in case of failure.
956 960 */
957 961 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
958 962 struct ubi_vid_hdr *vid_hdr)
... ... @@ -978,7 +982,7 @@
978 982 /*
979 983 * Note, we may race with volume deletion, which means that the volume
980 984 * this logical eraseblock belongs to might be being deleted. Since the
981   - * volume deletion unmaps all the volume's logical eraseblocks, it will
  985 + * volume deletion un-maps all the volume's logical eraseblocks, it will
982 986 * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish.
983 987 */
984 988 vol = ubi->volumes[idx];
... ... @@ -986,7 +990,7 @@
986 990 /* No need to do further work, cancel */
987 991 dbg_eba("volume %d is being removed, cancel", vol_id);
988 992 spin_unlock(&ubi->volumes_lock);
989   - return 2;
  993 + return 1;
990 994 }
991 995 spin_unlock(&ubi->volumes_lock);
992 996  
... ... @@ -1023,7 +1027,7 @@
1023 1027  
1024 1028 /*
1025 1029 * OK, now the LEB is locked and we can safely start moving it. Since
1026   - * this function utilizes thie @ubi->peb1_buf buffer which is shared
  1030 + * this function utilizes the @ubi->peb1_buf buffer which is shared
1027 1031 * with some other functions, so lock the buffer by taking the
1028 1032 * @ubi->buf_mutex.
1029 1033 */
1030 1034  
... ... @@ -1068,8 +1072,11 @@
1068 1072 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
1069 1073  
1070 1074 err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
1071   - if (err)
  1075 + if (err) {
  1076 + if (err == -EIO)
  1077 + err = 2;
1072 1078 goto out_unlock_buf;
  1079 + }
1073 1080  
1074 1081 cond_resched();
1075 1082  
1076 1083  
1077 1084  
... ... @@ -1079,14 +1086,17 @@
1079 1086 if (err != UBI_IO_BITFLIPS)
1080 1087 ubi_warn("cannot read VID header back from PEB %d", to);
1081 1088 else
1082   - err = 1;
  1089 + err = -EAGAIN;
1083 1090 goto out_unlock_buf;
1084 1091 }
1085 1092  
1086 1093 if (data_size > 0) {
1087 1094 err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size);
1088   - if (err)
  1095 + if (err) {
  1096 + if (err == -EIO)
  1097 + err = 2;
1089 1098 goto out_unlock_buf;
  1099 + }
1090 1100  
1091 1101 cond_resched();
1092 1102  
1093 1103  
... ... @@ -1101,15 +1111,16 @@
1101 1111 ubi_warn("cannot read data back from PEB %d",
1102 1112 to);
1103 1113 else
1104   - err = 1;
  1114 + err = -EAGAIN;
1105 1115 goto out_unlock_buf;
1106 1116 }
1107 1117  
1108 1118 cond_resched();
1109 1119  
1110 1120 if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) {
1111   - ubi_warn("read data back from PEB %d - it is different",
1112   - to);
  1121 + ubi_warn("read data back from PEB %d and it is "
  1122 + "different", to);
  1123 + err = -EINVAL;
1113 1124 goto out_unlock_buf;
1114 1125 }
1115 1126 }
drivers/mtd/ubi/io.c
... ... @@ -637,8 +637,6 @@
637 637  
638 638 dbg_io("read EC header from PEB %d", pnum);
639 639 ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
640   - if (UBI_IO_DEBUG)
641   - verbose = 1;
642 640  
643 641 err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
644 642 if (err) {
... ... @@ -685,6 +683,9 @@
685 683 if (verbose)
686 684 ubi_warn("no EC header found at PEB %d, "
687 685 "only 0xFF bytes", pnum);
  686 + else if (UBI_IO_DEBUG)
  687 + dbg_msg("no EC header found at PEB %d, "
  688 + "only 0xFF bytes", pnum);
688 689 return UBI_IO_PEB_EMPTY;
689 690 }
690 691  
... ... @@ -696,7 +697,9 @@
696 697 ubi_warn("bad magic number at PEB %d: %08x instead of "
697 698 "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
698 699 ubi_dbg_dump_ec_hdr(ec_hdr);
699   - }
  700 + } else if (UBI_IO_DEBUG)
  701 + dbg_msg("bad magic number at PEB %d: %08x instead of "
  702 + "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
700 703 return UBI_IO_BAD_EC_HDR;
701 704 }
702 705  
... ... @@ -708,7 +711,9 @@
708 711 ubi_warn("bad EC header CRC at PEB %d, calculated "
709 712 "%#08x, read %#08x", pnum, crc, hdr_crc);
710 713 ubi_dbg_dump_ec_hdr(ec_hdr);
711   - }
  714 + } else if (UBI_IO_DEBUG)
  715 + dbg_msg("bad EC header CRC at PEB %d, calculated "
  716 + "%#08x, read %#08x", pnum, crc, hdr_crc);
712 717 return UBI_IO_BAD_EC_HDR;
713 718 }
714 719  
... ... @@ -912,8 +917,6 @@
912 917  
913 918 dbg_io("read VID header from PEB %d", pnum);
914 919 ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
915   - if (UBI_IO_DEBUG)
916   - verbose = 1;
917 920  
918 921 p = (char *)vid_hdr - ubi->vid_hdr_shift;
919 922 err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
... ... @@ -960,6 +963,9 @@
960 963 if (verbose)
961 964 ubi_warn("no VID header found at PEB %d, "
962 965 "only 0xFF bytes", pnum);
  966 + else if (UBI_IO_DEBUG)
  967 + dbg_msg("no VID header found at PEB %d, "
  968 + "only 0xFF bytes", pnum);
963 969 return UBI_IO_PEB_FREE;
964 970 }
965 971  
... ... @@ -971,7 +977,9 @@
971 977 ubi_warn("bad magic number at PEB %d: %08x instead of "
972 978 "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
973 979 ubi_dbg_dump_vid_hdr(vid_hdr);
974   - }
  980 + } else if (UBI_IO_DEBUG)
  981 + dbg_msg("bad magic number at PEB %d: %08x instead of "
  982 + "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
975 983 return UBI_IO_BAD_VID_HDR;
976 984 }
977 985  
... ... @@ -983,7 +991,9 @@
983 991 ubi_warn("bad CRC at PEB %d, calculated %#08x, "
984 992 "read %#08x", pnum, crc, hdr_crc);
985 993 ubi_dbg_dump_vid_hdr(vid_hdr);
986   - }
  994 + } else if (UBI_IO_DEBUG)
  995 + dbg_msg("bad CRC at PEB %d, calculated %#08x, "
  996 + "read %#08x", pnum, crc, hdr_crc);
987 997 return UBI_IO_BAD_VID_HDR;
988 998 }
989 999  
... ... @@ -1024,7 +1034,7 @@
1024 1034  
1025 1035 err = paranoid_check_peb_ec_hdr(ubi, pnum);
1026 1036 if (err)
1027   - return err > 0 ? -EINVAL: err;
  1037 + return err > 0 ? -EINVAL : err;
1028 1038  
1029 1039 vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC);
1030 1040 vid_hdr->version = UBI_VERSION;
drivers/mtd/ubi/ubi.h
... ... @@ -74,6 +74,13 @@
74 74 #define UBI_IO_RETRIES 3
75 75  
76 76 /*
  77 + * Length of the protection queue. The length is effectively equivalent to the
  78 + * number of (global) erase cycles PEBs are protected from the wear-leveling
  79 + * worker.
  80 + */
  81 +#define UBI_PROT_QUEUE_LEN 10
  82 +
  83 +/*
77 84 * Error codes returned by the I/O sub-system.
78 85 *
79 86 * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
... ... @@ -95,7 +102,8 @@
95 102  
96 103 /**
97 104 * struct ubi_wl_entry - wear-leveling entry.
98   - * @rb: link in the corresponding RB-tree
  105 + * @u.rb: link in the corresponding (free/used) RB-tree
  106 + * @u.list: link in the protection queue
99 107 * @ec: erase counter
100 108 * @pnum: physical eraseblock number
101 109 *
... ... @@ -104,7 +112,10 @@
104 112 * RB-trees. See WL sub-system for details.
105 113 */
106 114 struct ubi_wl_entry {
107   - struct rb_node rb;
  115 + union {
  116 + struct rb_node rb;
  117 + struct list_head list;
  118 + } u;
108 119 int ec;
109 120 int pnum;
110 121 };
... ... @@ -288,7 +299,7 @@
288 299 * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling
289 300 *
290 301 * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end
291   - * of UBI ititializetion
  302 + * of UBI initialization
292 303 * @vtbl_slots: how many slots are available in the volume table
293 304 * @vtbl_size: size of the volume table in bytes
294 305 * @vtbl: in-RAM volume table copy
295 306  
296 307  
... ... @@ -306,18 +317,17 @@
306 317 * @used: RB-tree of used physical eraseblocks
307 318 * @free: RB-tree of free physical eraseblocks
308 319 * @scrub: RB-tree of physical eraseblocks which need scrubbing
309   - * @prot: protection trees
310   - * @prot.pnum: protection tree indexed by physical eraseblock numbers
311   - * @prot.aec: protection tree indexed by absolute erase counter value
312   - * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from,
313   - * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
314   - * fields
  320 + * @pq: protection queue (contain physical eraseblocks which are temporarily
  321 + * protected from the wear-leveling worker)
  322 + * @pq_head: protection queue head
  323 + * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from,
  324 + * @move_to, @move_to_put @erase_pending, @wl_scheduled and @works
  325 + * fields
315 326 * @move_mutex: serializes eraseblock moves
316   - * @work_sem: sycnhronizes the WL worker with use tasks
  327 + * @work_sem: synchronizes the WL worker with use tasks
317 328 * @wl_scheduled: non-zero if the wear-leveling was scheduled
318 329 * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
319 330 * physical eraseblock
320   - * @abs_ec: absolute erase counter
321 331 * @move_from: physical eraseblock from where the data is being moved
322 332 * @move_to: physical eraseblock where the data is being moved to
323 333 * @move_to_put: if the "to" PEB was put
324 334  
325 335  
... ... @@ -351,11 +361,11 @@
351 361 *
352 362 * @peb_buf1: a buffer of PEB size used for different purposes
353 363 * @peb_buf2: another buffer of PEB size used for different purposes
354   - * @buf_mutex: proptects @peb_buf1 and @peb_buf2
  364 + * @buf_mutex: protects @peb_buf1 and @peb_buf2
355 365 * @ckvol_mutex: serializes static volume checking when opening
356   - * @mult_mutex: serializes operations on multiple volumes, like re-nameing
  366 + * @mult_mutex: serializes operations on multiple volumes, like re-naming
357 367 * @dbg_peb_buf: buffer of PEB size used for debugging
358   - * @dbg_buf_mutex: proptects @dbg_peb_buf
  368 + * @dbg_buf_mutex: protects @dbg_peb_buf
359 369 */
360 370 struct ubi_device {
361 371 struct cdev cdev;
362 372  
... ... @@ -392,16 +402,13 @@
392 402 struct rb_root used;
393 403 struct rb_root free;
394 404 struct rb_root scrub;
395   - struct {
396   - struct rb_root pnum;
397   - struct rb_root aec;
398   - } prot;
  405 + struct list_head pq[UBI_PROT_QUEUE_LEN];
  406 + int pq_head;
399 407 spinlock_t wl_lock;
400 408 struct mutex move_mutex;
401 409 struct rw_semaphore work_sem;
402 410 int wl_scheduled;
403 411 struct ubi_wl_entry **lookuptbl;
404   - unsigned long long abs_ec;
405 412 struct ubi_wl_entry *move_from;
406 413 struct ubi_wl_entry *move_to;
407 414 int move_to_put;
drivers/mtd/ubi/wl.c
... ... @@ -22,7 +22,7 @@
22 22 * UBI wear-leveling sub-system.
23 23 *
24 24 * This sub-system is responsible for wear-leveling. It works in terms of
25   - * physical* eraseblocks and erase counters and knows nothing about logical
  25 + * physical eraseblocks and erase counters and knows nothing about logical
26 26 * eraseblocks, volumes, etc. From this sub-system's perspective all physical
27 27 * eraseblocks are of two types - used and free. Used physical eraseblocks are
28 28 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
29 29  
... ... @@ -55,9 +55,40 @@
55 55 *
56 56 * As it was said, for the UBI sub-system all physical eraseblocks are either
57 57 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
58   - * used eraseblocks are kept in a set of different RB-trees: @wl->used,
59   - * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
  58 + * used eraseblocks are kept in @wl->used or @wl->scrub RB-trees, or
  59 + * (temporarily) in the @wl->pq queue.
60 60 *
  61 + * When the WL sub-system returns a physical eraseblock, the physical
  62 + * eraseblock is protected from being moved for some "time". For this reason,
  63 + * the physical eraseblock is not directly moved from the @wl->free tree to the
  64 + * @wl->used tree. There is a protection queue in between where this
  65 + * physical eraseblock is temporarily stored (@wl->pq).
  66 + *
  67 + * All this protection stuff is needed because:
  68 + * o we don't want to move physical eraseblocks just after we have given them
  69 + * to the user; instead, we first want to let users fill them up with data;
  70 + *
  71 + * o there is a chance that the user will put the physical eraseblock very
  72 + * soon, so it makes sense not to move it for some time, but wait; this is
  73 + * especially important in case of "short term" physical eraseblocks.
  74 + *
  75 + * Physical eraseblocks stay protected only for limited time. But the "time" is
  76 + * measured in erase cycles in this case. This is implemented with help of the
  77 + * protection queue. Eraseblocks are put to the tail of this queue when they
  78 + * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
  79 + * head of the queue on each erase operation (for any eraseblock). So the
  80 + * length of the queue defines how may (global) erase cycles PEBs are protected.
  81 + *
  82 + * To put it differently, each physical eraseblock has 2 main states: free and
  83 + * used. The former state corresponds to the @wl->free tree. The latter state
  84 + * is split up on several sub-states:
  85 + * o the WL movement is allowed (@wl->used tree);
  86 + * o the WL movement is temporarily prohibited (@wl->pq queue);
  87 + * o scrubbing is needed (@wl->scrub tree).
  88 + *
  89 + * Depending on the sub-state, wear-leveling entries of the used physical
  90 + * eraseblocks may be kept in one of those structures.
  91 + *
61 92 * Note, in this implementation, we keep a small in-RAM object for each physical
62 93 * eraseblock. This is surely not a scalable solution. But it appears to be good
63 94 * enough for moderately large flashes and it is simple. In future, one may
... ... @@ -70,9 +101,6 @@
70 101 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
71 102 * pick target PEB with an average EC if our PEB is not very "old". This is a
72 103 * room for future re-works of the WL sub-system.
73   - *
74   - * Note: the stuff with protection trees looks too complex and is difficult to
75   - * understand. Should be fixed.
76 104 */
77 105  
78 106 #include <linux/slab.h>
... ... @@ -85,14 +113,6 @@
85 113 #define WL_RESERVED_PEBS 1
86 114  
87 115 /*
88   - * How many erase cycles are short term, unknown, and long term physical
89   - * eraseblocks protected.
90   - */
91   -#define ST_PROTECTION 16
92   -#define U_PROTECTION 10
93   -#define LT_PROTECTION 4
94   -
95   -/*
96 116 * Maximum difference between two erase counters. If this threshold is
97 117 * exceeded, the WL sub-system starts moving data from used physical
98 118 * eraseblocks with low erase counter to free physical eraseblocks with high
99 119  
... ... @@ -120,64 +140,9 @@
120 140 #define WL_MAX_FAILURES 32
121 141  
122 142 /**
123   - * struct ubi_wl_prot_entry - PEB protection entry.
124   - * @rb_pnum: link in the @wl->prot.pnum RB-tree
125   - * @rb_aec: link in the @wl->prot.aec RB-tree
126   - * @abs_ec: the absolute erase counter value when the protection ends
127   - * @e: the wear-leveling entry of the physical eraseblock under protection
128   - *
129   - * When the WL sub-system returns a physical eraseblock, the physical
130   - * eraseblock is protected from being moved for some "time". For this reason,
131   - * the physical eraseblock is not directly moved from the @wl->free tree to the
132   - * @wl->used tree. There is one more tree in between where this physical
133   - * eraseblock is temporarily stored (@wl->prot).
134   - *
135   - * All this protection stuff is needed because:
136   - * o we don't want to move physical eraseblocks just after we have given them
137   - * to the user; instead, we first want to let users fill them up with data;
138   - *
139   - * o there is a chance that the user will put the physical eraseblock very
140   - * soon, so it makes sense not to move it for some time, but wait; this is
141   - * especially important in case of "short term" physical eraseblocks.
142   - *
143   - * Physical eraseblocks stay protected only for limited time. But the "time" is
144   - * measured in erase cycles in this case. This is implemented with help of the
145   - * absolute erase counter (@wl->abs_ec). When it reaches certain value, the
146   - * physical eraseblocks are moved from the protection trees (@wl->prot.*) to
147   - * the @wl->used tree.
148   - *
149   - * Protected physical eraseblocks are searched by physical eraseblock number
150   - * (when they are put) and by the absolute erase counter (to check if it is
151   - * time to move them to the @wl->used tree). So there are actually 2 RB-trees
152   - * storing the protected physical eraseblocks: @wl->prot.pnum and
153   - * @wl->prot.aec. They are referred to as the "protection" trees. The
154   - * first one is indexed by the physical eraseblock number. The second one is
155   - * indexed by the absolute erase counter. Both trees store
156   - * &struct ubi_wl_prot_entry objects.
157   - *
158   - * Each physical eraseblock has 2 main states: free and used. The former state
159   - * corresponds to the @wl->free tree. The latter state is split up on several
160   - * sub-states:
161   - * o the WL movement is allowed (@wl->used tree);
162   - * o the WL movement is temporarily prohibited (@wl->prot.pnum and
163   - * @wl->prot.aec trees);
164   - * o scrubbing is needed (@wl->scrub tree).
165   - *
166   - * Depending on the sub-state, wear-leveling entries of the used physical
167   - * eraseblocks may be kept in one of those trees.
168   - */
169   -struct ubi_wl_prot_entry {
170   - struct rb_node rb_pnum;
171   - struct rb_node rb_aec;
172   - unsigned long long abs_ec;
173   - struct ubi_wl_entry *e;
174   -};
175   -
176   -/**
177 143 * struct ubi_work - UBI work description data structure.
178 144 * @list: a link in the list of pending works
179 145 * @func: worker function
180   - * @priv: private data of the worker function
181 146 * @e: physical eraseblock to erase
182 147 * @torture: if the physical eraseblock has to be tortured
183 148 *
184 149  
... ... @@ -198,9 +163,11 @@
198 163 static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec);
199 164 static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
200 165 struct rb_root *root);
  166 +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e);
201 167 #else
202 168 #define paranoid_check_ec(ubi, pnum, ec) 0
203 169 #define paranoid_check_in_wl_tree(e, root)
  170 +#define paranoid_check_in_pq(ubi, e) 0
204 171 #endif
205 172  
206 173 /**
... ... @@ -220,7 +187,7 @@
220 187 struct ubi_wl_entry *e1;
221 188  
222 189 parent = *p;
223   - e1 = rb_entry(parent, struct ubi_wl_entry, rb);
  190 + e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
224 191  
225 192 if (e->ec < e1->ec)
226 193 p = &(*p)->rb_left;
... ... @@ -235,8 +202,8 @@
235 202 }
236 203 }
237 204  
238   - rb_link_node(&e->rb, parent, p);
239   - rb_insert_color(&e->rb, root);
  205 + rb_link_node(&e->u.rb, parent, p);
  206 + rb_insert_color(&e->u.rb, root);
240 207 }
241 208  
242 209 /**
... ... @@ -331,7 +298,7 @@
331 298 while (p) {
332 299 struct ubi_wl_entry *e1;
333 300  
334   - e1 = rb_entry(p, struct ubi_wl_entry, rb);
  301 + e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
335 302  
336 303 if (e->pnum == e1->pnum) {
337 304 ubi_assert(e == e1);
338 305  
339 306  
340 307  
341 308  
342 309  
... ... @@ -355,50 +322,24 @@
355 322 }
356 323  
357 324 /**
358   - * prot_tree_add - add physical eraseblock to protection trees.
  325 + * prot_queue_add - add physical eraseblock to the protection queue.
359 326 * @ubi: UBI device description object
360 327 * @e: the physical eraseblock to add
361   - * @pe: protection entry object to use
362   - * @abs_ec: absolute erase counter value when this physical eraseblock has
363   - * to be removed from the protection trees.
364 328 *
365   - * @wl->lock has to be locked.
  329 + * This function adds @e to the tail of the protection queue @ubi->pq, where
  330 + * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
  331 + * temporarily protected from the wear-leveling worker. Note, @wl->lock has to
  332 + * be locked.
366 333 */
367   -static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e,
368   - struct ubi_wl_prot_entry *pe, int abs_ec)
  334 +static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
369 335 {
370   - struct rb_node **p, *parent = NULL;
371   - struct ubi_wl_prot_entry *pe1;
  336 + int pq_tail = ubi->pq_head - 1;
372 337  
373   - pe->e = e;
374   - pe->abs_ec = ubi->abs_ec + abs_ec;
375   -
376   - p = &ubi->prot.pnum.rb_node;
377   - while (*p) {
378   - parent = *p;
379   - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum);
380   -
381   - if (e->pnum < pe1->e->pnum)
382   - p = &(*p)->rb_left;
383   - else
384   - p = &(*p)->rb_right;
385   - }
386   - rb_link_node(&pe->rb_pnum, parent, p);
387   - rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum);
388   -
389   - p = &ubi->prot.aec.rb_node;
390   - parent = NULL;
391   - while (*p) {
392   - parent = *p;
393   - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec);
394   -
395   - if (pe->abs_ec < pe1->abs_ec)
396   - p = &(*p)->rb_left;
397   - else
398   - p = &(*p)->rb_right;
399   - }
400   - rb_link_node(&pe->rb_aec, parent, p);
401   - rb_insert_color(&pe->rb_aec, &ubi->prot.aec);
  338 + if (pq_tail < 0)
  339 + pq_tail = UBI_PROT_QUEUE_LEN - 1;
  340 + ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN);
  341 + list_add_tail(&e->u.list, &ubi->pq[pq_tail]);
  342 + dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
402 343 }
403 344  
404 345 /**
405 346  
... ... @@ -414,14 +355,14 @@
414 355 struct rb_node *p;
415 356 struct ubi_wl_entry *e;
416 357  
417   - e = rb_entry(rb_first(root), struct ubi_wl_entry, rb);
  358 + e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
418 359 max += e->ec;
419 360  
420 361 p = root->rb_node;
421 362 while (p) {
422 363 struct ubi_wl_entry *e1;
423 364  
424   - e1 = rb_entry(p, struct ubi_wl_entry, rb);
  365 + e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
425 366 if (e1->ec >= max)
426 367 p = p->rb_left;
427 368 else {
428 369  
429 370  
... ... @@ -443,17 +384,12 @@
443 384 */
444 385 int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
445 386 {
446   - int err, protect, medium_ec;
  387 + int err, medium_ec;
447 388 struct ubi_wl_entry *e, *first, *last;
448   - struct ubi_wl_prot_entry *pe;
449 389  
450 390 ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM ||
451 391 dtype == UBI_UNKNOWN);
452 392  
453   - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
454   - if (!pe)
455   - return -ENOMEM;
456   -
457 393 retry:
458 394 spin_lock(&ubi->wl_lock);
459 395 if (!ubi->free.rb_node) {
460 396  
461 397  
... ... @@ -461,16 +397,13 @@
461 397 ubi_assert(list_empty(&ubi->works));
462 398 ubi_err("no free eraseblocks");
463 399 spin_unlock(&ubi->wl_lock);
464   - kfree(pe);
465 400 return -ENOSPC;
466 401 }
467 402 spin_unlock(&ubi->wl_lock);
468 403  
469 404 err = produce_free_peb(ubi);
470   - if (err < 0) {
471   - kfree(pe);
  405 + if (err < 0)
472 406 return err;
473   - }
474 407 goto retry;
475 408 }
476 409  
... ... @@ -483,7 +416,6 @@
483 416 * %WL_FREE_MAX_DIFF.
484 417 */
485 418 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
486   - protect = LT_PROTECTION;
487 419 break;
488 420 case UBI_UNKNOWN:
489 421 /*
490 422  
491 423  
492 424  
493 425  
494 426  
495 427  
496 428  
497 429  
498 430  
499 431  
500 432  
501 433  
502 434  
503 435  
504 436  
... ... @@ -492,81 +424,63 @@
492 424 * eraseblock with erase counter greater or equivalent than the
493 425 * lowest erase counter plus %WL_FREE_MAX_DIFF.
494 426 */
495   - first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
496   - last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb);
  427 + first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry,
  428 + u.rb);
  429 + last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, u.rb);
497 430  
498 431 if (last->ec - first->ec < WL_FREE_MAX_DIFF)
499 432 e = rb_entry(ubi->free.rb_node,
500   - struct ubi_wl_entry, rb);
  433 + struct ubi_wl_entry, u.rb);
501 434 else {
502 435 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
503 436 e = find_wl_entry(&ubi->free, medium_ec);
504 437 }
505   - protect = U_PROTECTION;
506 438 break;
507 439 case UBI_SHORTTERM:
508 440 /*
509 441 * For short term data we pick a physical eraseblock with the
510 442 * lowest erase counter as we expect it will be erased soon.
511 443 */
512   - e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
513   - protect = ST_PROTECTION;
  444 + e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb);
514 445 break;
515 446 default:
516   - protect = 0;
517   - e = NULL;
518 447 BUG();
519 448 }
520 449  
  450 + paranoid_check_in_wl_tree(e, &ubi->free);
  451 +
521 452 /*
522   - * Move the physical eraseblock to the protection trees where it will
  453 + * Move the physical eraseblock to the protection queue where it will
523 454 * be protected from being moved for some time.
524 455 */
525   - paranoid_check_in_wl_tree(e, &ubi->free);
526   - rb_erase(&e->rb, &ubi->free);
527   - prot_tree_add(ubi, e, pe, protect);
528   -
529   - dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect);
  456 + rb_erase(&e->u.rb, &ubi->free);
  457 + dbg_wl("PEB %d EC %d", e->pnum, e->ec);
  458 + prot_queue_add(ubi, e);
530 459 spin_unlock(&ubi->wl_lock);
531   -
532 460 return e->pnum;
533 461 }
534 462  
535 463 /**
536   - * prot_tree_del - remove a physical eraseblock from the protection trees
  464 + * prot_queue_del - remove a physical eraseblock from the protection queue.
537 465 * @ubi: UBI device description object
538 466 * @pnum: the physical eraseblock to remove
539 467 *
540   - * This function returns PEB @pnum from the protection trees and returns zero
541   - * in case of success and %-ENODEV if the PEB was not found in the protection
542   - * trees.
  468 + * This function deletes PEB @pnum from the protection queue and returns zero
  469 + * in case of success and %-ENODEV if the PEB was not found.
543 470 */
544   -static int prot_tree_del(struct ubi_device *ubi, int pnum)
  471 +static int prot_queue_del(struct ubi_device *ubi, int pnum)
545 472 {
546   - struct rb_node *p;
547   - struct ubi_wl_prot_entry *pe = NULL;
  473 + struct ubi_wl_entry *e;
548 474  
549   - p = ubi->prot.pnum.rb_node;
550   - while (p) {
  475 + e = ubi->lookuptbl[pnum];
  476 + if (!e)
  477 + return -ENODEV;
551 478  
552   - pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
  479 + if (paranoid_check_in_pq(ubi, e))
  480 + return -ENODEV;
553 481  
554   - if (pnum == pe->e->pnum)
555   - goto found;
556   -
557   - if (pnum < pe->e->pnum)
558   - p = p->rb_left;
559   - else
560   - p = p->rb_right;
561   - }
562   -
563   - return -ENODEV;
564   -
565   -found:
566   - ubi_assert(pe->e->pnum == pnum);
567   - rb_erase(&pe->rb_aec, &ubi->prot.aec);
568   - rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
569   - kfree(pe);
  482 + list_del(&e->u.list);
  483 + dbg_wl("deleted PEB %d from the protection queue", e->pnum);
570 484 return 0;
571 485 }
572 486  
573 487  
574 488  
575 489  
576 490  
577 491  
578 492  
579 493  
580 494  
... ... @@ -632,47 +546,47 @@
632 546 }
633 547  
634 548 /**
635   - * check_protection_over - check if it is time to stop protecting some PEBs.
  549 + * serve_prot_queue - check if it is time to stop protecting PEBs.
636 550 * @ubi: UBI device description object
637 551 *
638   - * This function is called after each erase operation, when the absolute erase
639   - * counter is incremented, to check if some physical eraseblock have not to be
640   - * protected any longer. These physical eraseblocks are moved from the
641   - * protection trees to the used tree.
  552 + * This function is called after each erase operation and removes PEBs from the
  553 + * tail of the protection queue. These PEBs have been protected for long enough
  554 + * and should be moved to the used tree.
642 555 */
643   -static void check_protection_over(struct ubi_device *ubi)
  556 +static void serve_prot_queue(struct ubi_device *ubi)
644 557 {
645   - struct ubi_wl_prot_entry *pe;
  558 + struct ubi_wl_entry *e, *tmp;
  559 + int count;
646 560  
647 561 /*
648 562 * There may be several protected physical eraseblock to remove,
649 563 * process them all.
650 564 */
651   - while (1) {
652   - spin_lock(&ubi->wl_lock);
653   - if (!ubi->prot.aec.rb_node) {
654   - spin_unlock(&ubi->wl_lock);
655   - break;
656   - }
  565 +repeat:
  566 + count = 0;
  567 + spin_lock(&ubi->wl_lock);
  568 + list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
  569 + dbg_wl("PEB %d EC %d protection over, move to used tree",
  570 + e->pnum, e->ec);
657 571  
658   - pe = rb_entry(rb_first(&ubi->prot.aec),
659   - struct ubi_wl_prot_entry, rb_aec);
660   -
661   - if (pe->abs_ec > ubi->abs_ec) {
  572 + list_del(&e->u.list);
  573 + wl_tree_add(e, &ubi->used);
  574 + if (count++ > 32) {
  575 + /*
  576 + * Let's be nice and avoid holding the spinlock for
  577 + * too long.
  578 + */
662 579 spin_unlock(&ubi->wl_lock);
663   - break;
  580 + cond_resched();
  581 + goto repeat;
664 582 }
665   -
666   - dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu",
667   - pe->e->pnum, ubi->abs_ec, pe->abs_ec);
668   - rb_erase(&pe->rb_aec, &ubi->prot.aec);
669   - rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
670   - wl_tree_add(pe->e, &ubi->used);
671   - spin_unlock(&ubi->wl_lock);
672   -
673   - kfree(pe);
674   - cond_resched();
675 583 }
  584 +
  585 + ubi->pq_head += 1;
  586 + if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
  587 + ubi->pq_head = 0;
  588 + ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN);
  589 + spin_unlock(&ubi->wl_lock);
676 590 }
677 591  
678 592 /**
... ... @@ -680,8 +594,8 @@
680 594 * @ubi: UBI device description object
681 595 * @wrk: the work to schedule
682 596 *
683   - * This function enqueues a work defined by @wrk to the tail of the pending
684   - * works list.
  597 + * This function adds a work defined by @wrk to the tail of the pending works
  598 + * list.
685 599 */
686 600 static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
687 601 {
688 602  
... ... @@ -739,13 +653,11 @@
739 653 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
740 654 int cancel)
741 655 {
742   - int err, put = 0, scrubbing = 0, protect = 0;
743   - struct ubi_wl_prot_entry *uninitialized_var(pe);
  656 + int err, scrubbing = 0, torture = 0;
744 657 struct ubi_wl_entry *e1, *e2;
745 658 struct ubi_vid_hdr *vid_hdr;
746 659  
747 660 kfree(wrk);
748   -
749 661 if (cancel)
750 662 return 0;
751 663  
... ... @@ -781,7 +693,7 @@
781 693 * highly worn-out free physical eraseblock. If the erase
782 694 * counters differ much enough, start wear-leveling.
783 695 */
784   - e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
  696 + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
785 697 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
786 698  
787 699 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
788 700  
789 701  
790 702  
... ... @@ -790,21 +702,21 @@
790 702 goto out_cancel;
791 703 }
792 704 paranoid_check_in_wl_tree(e1, &ubi->used);
793   - rb_erase(&e1->rb, &ubi->used);
  705 + rb_erase(&e1->u.rb, &ubi->used);
794 706 dbg_wl("move PEB %d EC %d to PEB %d EC %d",
795 707 e1->pnum, e1->ec, e2->pnum, e2->ec);
796 708 } else {
797 709 /* Perform scrubbing */
798 710 scrubbing = 1;
799   - e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
  711 + e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
800 712 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
801 713 paranoid_check_in_wl_tree(e1, &ubi->scrub);
802   - rb_erase(&e1->rb, &ubi->scrub);
  714 + rb_erase(&e1->u.rb, &ubi->scrub);
803 715 dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
804 716 }
805 717  
806 718 paranoid_check_in_wl_tree(e2, &ubi->free);
807   - rb_erase(&e2->rb, &ubi->free);
  719 + rb_erase(&e2->u.rb, &ubi->free);
808 720 ubi->move_from = e1;
809 721 ubi->move_to = e2;
810 722 spin_unlock(&ubi->wl_lock);
811 723  
812 724  
813 725  
814 726  
815 727  
816 728  
817 729  
818 730  
819 731  
820 732  
... ... @@ -844,46 +756,67 @@
844 756  
845 757 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
846 758 if (err) {
847   -
  759 + if (err == -EAGAIN)
  760 + goto out_not_moved;
848 761 if (err < 0)
849 762 goto out_error;
850   - if (err == 1)
  763 + if (err == 2) {
  764 + /* Target PEB write error, torture it */
  765 + torture = 1;
851 766 goto out_not_moved;
  767 + }
852 768  
853 769 /*
854   - * For some reason the LEB was not moved - it might be because
855   - * the volume is being deleted. We should prevent this PEB from
856   - * being selected for wear-levelling movement for some "time",
857   - * so put it to the protection tree.
  770 + * The LEB has not been moved because the volume is being
  771 + * deleted or the PEB has been put meanwhile. We should prevent
  772 + * this PEB from being selected for wear-leveling movement
  773 + * again, so put it to the protection queue.
858 774 */
859 775  
860   - dbg_wl("cancelled moving PEB %d", e1->pnum);
861   - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
862   - if (!pe) {
863   - err = -ENOMEM;
864   - goto out_error;
865   - }
  776 + dbg_wl("canceled moving PEB %d", e1->pnum);
  777 + ubi_assert(err == 1);
866 778  
867   - protect = 1;
  779 + ubi_free_vid_hdr(ubi, vid_hdr);
  780 + vid_hdr = NULL;
  781 +
  782 + spin_lock(&ubi->wl_lock);
  783 + prot_queue_add(ubi, e1);
  784 + ubi_assert(!ubi->move_to_put);
  785 + ubi->move_from = ubi->move_to = NULL;
  786 + ubi->wl_scheduled = 0;
  787 + spin_unlock(&ubi->wl_lock);
  788 +
  789 + e1 = NULL;
  790 + err = schedule_erase(ubi, e2, 0);
  791 + if (err)
  792 + goto out_error;
  793 + mutex_unlock(&ubi->move_mutex);
  794 + return 0;
868 795 }
869 796  
  797 + /* The PEB has been successfully moved */
870 798 ubi_free_vid_hdr(ubi, vid_hdr);
871   - if (scrubbing && !protect)
  799 + vid_hdr = NULL;
  800 + if (scrubbing)
872 801 ubi_msg("scrubbed PEB %d, data moved to PEB %d",
873 802 e1->pnum, e2->pnum);
874 803  
875 804 spin_lock(&ubi->wl_lock);
876   - if (protect)
877   - prot_tree_add(ubi, e1, pe, protect);
878   - if (!ubi->move_to_put)
  805 + if (!ubi->move_to_put) {
879 806 wl_tree_add(e2, &ubi->used);
880   - else
881   - put = 1;
  807 + e2 = NULL;
  808 + }
882 809 ubi->move_from = ubi->move_to = NULL;
883 810 ubi->move_to_put = ubi->wl_scheduled = 0;
884 811 spin_unlock(&ubi->wl_lock);
885 812  
886   - if (put) {
  813 + err = schedule_erase(ubi, e1, 0);
  814 + if (err) {
  815 + e1 = NULL;
  816 + goto out_error;
  817 + }
  818 +
  819 + if (e2) {
887 820 /*
888 821 * Well, the target PEB was put meanwhile, schedule it for
889 822 * erasure.
... ... @@ -894,13 +827,6 @@
894 827 goto out_error;
895 828 }
896 829  
897   - if (!protect) {
898   - err = schedule_erase(ubi, e1, 0);
899   - if (err)
900   - goto out_error;
901   - }
902   -
903   -
904 830 dbg_wl("done");
905 831 mutex_unlock(&ubi->move_mutex);
906 832 return 0;
907 833  
908 834  
909 835  
910 836  
911 837  
... ... @@ -908,20 +834,24 @@
908 834 /*
909 835 * For some reasons the LEB was not moved, might be an error, might be
910 836 * something else. @e1 was not changed, so return it back. @e2 might
911   - * be changed, schedule it for erasure.
  837 + * have been changed, schedule it for erasure.
912 838 */
913 839 out_not_moved:
  840 + dbg_wl("canceled moving PEB %d", e1->pnum);
914 841 ubi_free_vid_hdr(ubi, vid_hdr);
  842 + vid_hdr = NULL;
915 843 spin_lock(&ubi->wl_lock);
916 844 if (scrubbing)
917 845 wl_tree_add(e1, &ubi->scrub);
918 846 else
919 847 wl_tree_add(e1, &ubi->used);
  848 + ubi_assert(!ubi->move_to_put);
920 849 ubi->move_from = ubi->move_to = NULL;
921   - ubi->move_to_put = ubi->wl_scheduled = 0;
  850 + ubi->wl_scheduled = 0;
922 851 spin_unlock(&ubi->wl_lock);
923 852  
924   - err = schedule_erase(ubi, e2, 0);
  853 + e1 = NULL;
  854 + err = schedule_erase(ubi, e2, torture);
925 855 if (err)
926 856 goto out_error;
927 857  
... ... @@ -938,8 +868,10 @@
938 868 ubi->move_to_put = ubi->wl_scheduled = 0;
939 869 spin_unlock(&ubi->wl_lock);
940 870  
941   - kmem_cache_free(ubi_wl_entry_slab, e1);
942   - kmem_cache_free(ubi_wl_entry_slab, e2);
  871 + if (e1)
  872 + kmem_cache_free(ubi_wl_entry_slab, e1);
  873 + if (e2)
  874 + kmem_cache_free(ubi_wl_entry_slab, e2);
943 875 ubi_ro_mode(ubi);
944 876  
945 877 mutex_unlock(&ubi->move_mutex);
... ... @@ -988,7 +920,7 @@
988 920 * erase counter of free physical eraseblocks is greater then
989 921 * %UBI_WL_THRESHOLD.
990 922 */
991   - e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
  923 + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
992 924 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
993 925  
994 926 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
... ... @@ -1050,7 +982,6 @@
1050 982 kfree(wl_wrk);
1051 983  
1052 984 spin_lock(&ubi->wl_lock);
1053   - ubi->abs_ec += 1;
1054 985 wl_tree_add(e, &ubi->free);
1055 986 spin_unlock(&ubi->wl_lock);
1056 987  
... ... @@ -1058,7 +989,7 @@
1058 989 * One more erase operation has happened, take care about
1059 990 * protected physical eraseblocks.
1060 991 */
1061   - check_protection_over(ubi);
  992 + serve_prot_queue(ubi);
1062 993  
1063 994 /* And take care about wear-leveling */
1064 995 err = ensure_wear_leveling(ubi);
1065 996  
1066 997  
... ... @@ -1190,12 +1121,12 @@
1190 1121 } else {
1191 1122 if (in_wl_tree(e, &ubi->used)) {
1192 1123 paranoid_check_in_wl_tree(e, &ubi->used);
1193   - rb_erase(&e->rb, &ubi->used);
  1124 + rb_erase(&e->u.rb, &ubi->used);
1194 1125 } else if (in_wl_tree(e, &ubi->scrub)) {
1195 1126 paranoid_check_in_wl_tree(e, &ubi->scrub);
1196   - rb_erase(&e->rb, &ubi->scrub);
  1127 + rb_erase(&e->u.rb, &ubi->scrub);
1197 1128 } else {
1198   - err = prot_tree_del(ubi, e->pnum);
  1129 + err = prot_queue_del(ubi, e->pnum);
1199 1130 if (err) {
1200 1131 ubi_err("PEB %d not found", pnum);
1201 1132 ubi_ro_mode(ubi);
1202 1133  
... ... @@ -1255,11 +1186,11 @@
1255 1186  
1256 1187 if (in_wl_tree(e, &ubi->used)) {
1257 1188 paranoid_check_in_wl_tree(e, &ubi->used);
1258   - rb_erase(&e->rb, &ubi->used);
  1189 + rb_erase(&e->u.rb, &ubi->used);
1259 1190 } else {
1260 1191 int err;
1261 1192  
1262   - err = prot_tree_del(ubi, e->pnum);
  1193 + err = prot_queue_del(ubi, e->pnum);
1263 1194 if (err) {
1264 1195 ubi_err("PEB %d not found", pnum);
1265 1196 ubi_ro_mode(ubi);
... ... @@ -1290,7 +1221,7 @@
1290 1221 int err;
1291 1222  
1292 1223 /*
1293   - * Erase while the pending works queue is not empty, but not more then
  1224 + * Erase while the pending works queue is not empty, but not more than
1294 1225 * the number of currently pending works.
1295 1226 */
1296 1227 dbg_wl("flush (%d pending works)", ubi->works_count);
... ... @@ -1308,7 +1239,7 @@
1308 1239 up_write(&ubi->work_sem);
1309 1240  
1310 1241 /*
1311   - * And in case last was the WL worker and it cancelled the LEB
  1242 + * And in case last was the WL worker and it canceled the LEB
1312 1243 * movement, flush again.
1313 1244 */
1314 1245 while (ubi->works_count) {
1315 1246  
... ... @@ -1337,11 +1268,11 @@
1337 1268 else if (rb->rb_right)
1338 1269 rb = rb->rb_right;
1339 1270 else {
1340   - e = rb_entry(rb, struct ubi_wl_entry, rb);
  1271 + e = rb_entry(rb, struct ubi_wl_entry, u.rb);
1341 1272  
1342 1273 rb = rb_parent(rb);
1343 1274 if (rb) {
1344   - if (rb->rb_left == &e->rb)
  1275 + if (rb->rb_left == &e->u.rb)
1345 1276 rb->rb_left = NULL;
1346 1277 else
1347 1278 rb->rb_right = NULL;
1348 1279  
1349 1280  
... ... @@ -1436,15 +1367,13 @@
1436 1367 */
1437 1368 int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1438 1369 {
1439   - int err;
  1370 + int err, i;
1440 1371 struct rb_node *rb1, *rb2;
1441 1372 struct ubi_scan_volume *sv;
1442 1373 struct ubi_scan_leb *seb, *tmp;
1443 1374 struct ubi_wl_entry *e;
1444 1375  
1445   -
1446 1376 ubi->used = ubi->free = ubi->scrub = RB_ROOT;
1447   - ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
1448 1377 spin_lock_init(&ubi->wl_lock);
1449 1378 mutex_init(&ubi->move_mutex);
1450 1379 init_rwsem(&ubi->work_sem);
... ... @@ -1458,6 +1387,10 @@
1458 1387 if (!ubi->lookuptbl)
1459 1388 return err;
1460 1389  
  1390 + for (i = 0; i < UBI_PROT_QUEUE_LEN; i++)
  1391 + INIT_LIST_HEAD(&ubi->pq[i]);
  1392 + ubi->pq_head = 0;
  1393 +
1461 1394 list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
1462 1395 cond_resched();
1463 1396  
1464 1397  
1465 1398  
1466 1399  
... ... @@ -1552,33 +1485,18 @@
1552 1485 }
1553 1486  
1554 1487 /**
1555   - * protection_trees_destroy - destroy the protection RB-trees.
  1488 + * protection_queue_destroy - destroy the protection queue.
1556 1489 * @ubi: UBI device description object
1557 1490 */
1558   -static void protection_trees_destroy(struct ubi_device *ubi)
  1491 +static void protection_queue_destroy(struct ubi_device *ubi)
1559 1492 {
1560   - struct rb_node *rb;
1561   - struct ubi_wl_prot_entry *pe;
  1493 + int i;
  1494 + struct ubi_wl_entry *e, *tmp;
1562 1495  
1563   - rb = ubi->prot.aec.rb_node;
1564   - while (rb) {
1565   - if (rb->rb_left)
1566   - rb = rb->rb_left;
1567   - else if (rb->rb_right)
1568   - rb = rb->rb_right;
1569   - else {
1570   - pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec);
1571   -
1572   - rb = rb_parent(rb);
1573   - if (rb) {
1574   - if (rb->rb_left == &pe->rb_aec)
1575   - rb->rb_left = NULL;
1576   - else
1577   - rb->rb_right = NULL;
1578   - }
1579   -
1580   - kmem_cache_free(ubi_wl_entry_slab, pe->e);
1581   - kfree(pe);
  1496 + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) {
  1497 + list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
  1498 + list_del(&e->u.list);
  1499 + kmem_cache_free(ubi_wl_entry_slab, e);
1582 1500 }
1583 1501 }
1584 1502 }
... ... @@ -1591,7 +1509,7 @@
1591 1509 {
1592 1510 dbg_wl("close the WL sub-system");
1593 1511 cancel_pending(ubi);
1594   - protection_trees_destroy(ubi);
  1512 + protection_queue_destroy(ubi);
1595 1513 tree_destroy(&ubi->used);
1596 1514 tree_destroy(&ubi->free);
1597 1515 tree_destroy(&ubi->scrub);
... ... @@ -1661,5 +1579,28 @@
1661 1579 return 1;
1662 1580 }
1663 1581  
  1582 +/**
  1583 + * paranoid_check_in_pq - check if wear-leveling entry is in the protection
  1584 + * queue.
  1585 + * @ubi: UBI device description object
  1586 + * @e: the wear-leveling entry to check
  1587 + *
  1588 + * This function returns zero if @e is in @ubi->pq and %1 if it is not.
  1589 + */
  1590 +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e)
  1591 +{
  1592 + struct ubi_wl_entry *p;
  1593 + int i;
  1594 +
  1595 + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
  1596 + list_for_each_entry(p, &ubi->pq[i], u.list)
  1597 + if (p == e)
  1598 + return 0;
  1599 +
  1600 + ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue",
  1601 + e->pnum, e->ec);
  1602 + ubi_dbg_dump_stack();
  1603 + return 1;
  1604 +}
1664 1605 #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */