Commit 574c3fdae3890e60f8bc59e8107686944ba1e446
Exists in
master
and in
4 other branches
Merge branch 'linux-next' of git://git.infradead.org/~dedekind/ubi-2.6
* 'linux-next' of git://git.infradead.org/~dedekind/ubi-2.6: UBI: fix checkpatch.pl warnings UBI: simplify PEB protection code UBI: prepare for protection tree improvements UBI: return -ENOMEM upon failing vmalloc UBI: document UBI ioctls UBI: handle write errors in WL worker UBI: fix error path UBI: some code re-structuring UBI: fix deadlock UBI: fix warnings when debugging is enabled
Showing 8 changed files Side-by-side Diff
Documentation/ioctl/ioctl-number.txt
... | ... | @@ -97,6 +97,7 @@ |
97 | 97 | <http://linux01.gwdg.de/~alatham/ppdd.html> |
98 | 98 | 'M' all linux/soundcard.h |
99 | 99 | 'N' 00-1F drivers/usb/scanner.h |
100 | +'O' 00-02 include/mtd/ubi-user.h UBI | |
100 | 101 | 'P' all linux/soundcard.h |
101 | 102 | 'Q' all linux/soundcard.h |
102 | 103 | 'R' 00-1F linux/random.h |
... | ... | @@ -142,6 +143,9 @@ |
142 | 143 | 'n' 00-7F linux/ncp_fs.h |
143 | 144 | 'n' E0-FF video/matrox.h matroxfb |
144 | 145 | 'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 |
146 | +'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps) | |
147 | +'o' 40-41 include/mtd/ubi-user.h UBI | |
148 | +'o' 01-A1 include/linux/dvb/*.h DVB | |
145 | 149 | 'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this) |
146 | 150 | 'p' 00-3F linux/mc146818rtc.h conflict! |
147 | 151 | 'p' 40-7F linux/nvram.h |
drivers/mtd/ubi/build.c
... | ... | @@ -815,19 +815,20 @@ |
815 | 815 | if (err) |
816 | 816 | goto out_free; |
817 | 817 | |
818 | + err = -ENOMEM; | |
818 | 819 | ubi->peb_buf1 = vmalloc(ubi->peb_size); |
819 | 820 | if (!ubi->peb_buf1) |
820 | 821 | goto out_free; |
821 | 822 | |
822 | 823 | ubi->peb_buf2 = vmalloc(ubi->peb_size); |
823 | 824 | if (!ubi->peb_buf2) |
824 | - goto out_free; | |
825 | + goto out_free; | |
825 | 826 | |
826 | 827 | #ifdef CONFIG_MTD_UBI_DEBUG |
827 | 828 | mutex_init(&ubi->dbg_buf_mutex); |
828 | 829 | ubi->dbg_peb_buf = vmalloc(ubi->peb_size); |
829 | 830 | if (!ubi->dbg_peb_buf) |
830 | - goto out_free; | |
831 | + goto out_free; | |
831 | 832 | #endif |
832 | 833 | |
833 | 834 | err = attach_by_scanning(ubi); |
drivers/mtd/ubi/cdev.c
... | ... | @@ -721,7 +721,8 @@ |
721 | 721 | * It seems we need to remove volume with name @re->new_name, |
722 | 722 | * if it exists. |
723 | 723 | */ |
724 | - desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE); | |
724 | + desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, | |
725 | + UBI_EXCLUSIVE); | |
725 | 726 | if (IS_ERR(desc)) { |
726 | 727 | err = PTR_ERR(desc); |
727 | 728 | if (err == -ENODEV) |
drivers/mtd/ubi/debug.h
... | ... | @@ -27,11 +27,11 @@ |
27 | 27 | #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) |
28 | 28 | |
29 | 29 | #define ubi_assert(expr) do { \ |
30 | - if (unlikely(!(expr))) { \ | |
31 | - printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ | |
32 | - __func__, __LINE__, current->pid); \ | |
33 | - ubi_dbg_dump_stack(); \ | |
34 | - } \ | |
30 | + if (unlikely(!(expr))) { \ | |
31 | + printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ | |
32 | + __func__, __LINE__, current->pid); \ | |
33 | + ubi_dbg_dump_stack(); \ | |
34 | + } \ | |
35 | 35 | } while (0) |
36 | 36 | |
37 | 37 | #define dbg_msg(fmt, ...) \ |
drivers/mtd/ubi/eba.c
... | ... | @@ -504,12 +504,9 @@ |
504 | 504 | if (!vid_hdr) |
505 | 505 | return -ENOMEM; |
506 | 506 | |
507 | - mutex_lock(&ubi->buf_mutex); | |
508 | - | |
509 | 507 | retry: |
510 | 508 | new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN); |
511 | 509 | if (new_pnum < 0) { |
512 | - mutex_unlock(&ubi->buf_mutex); | |
513 | 510 | ubi_free_vid_hdr(ubi, vid_hdr); |
514 | 511 | return new_pnum; |
515 | 512 | } |
516 | 513 | |
517 | 514 | |
518 | 515 | |
... | ... | @@ -529,20 +526,23 @@ |
529 | 526 | goto write_error; |
530 | 527 | |
531 | 528 | data_size = offset + len; |
529 | + mutex_lock(&ubi->buf_mutex); | |
532 | 530 | memset(ubi->peb_buf1 + offset, 0xFF, len); |
533 | 531 | |
534 | 532 | /* Read everything before the area where the write failure happened */ |
535 | 533 | if (offset > 0) { |
536 | 534 | err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); |
537 | 535 | if (err && err != UBI_IO_BITFLIPS) |
538 | - goto out_put; | |
536 | + goto out_unlock; | |
539 | 537 | } |
540 | 538 | |
541 | 539 | memcpy(ubi->peb_buf1 + offset, buf, len); |
542 | 540 | |
543 | 541 | err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); |
544 | - if (err) | |
542 | + if (err) { | |
543 | + mutex_unlock(&ubi->buf_mutex); | |
545 | 544 | goto write_error; |
545 | + } | |
546 | 546 | |
547 | 547 | mutex_unlock(&ubi->buf_mutex); |
548 | 548 | ubi_free_vid_hdr(ubi, vid_hdr); |
549 | 549 | |
... | ... | @@ -553,8 +553,9 @@ |
553 | 553 | ubi_msg("data was successfully recovered"); |
554 | 554 | return 0; |
555 | 555 | |
556 | -out_put: | |
556 | +out_unlock: | |
557 | 557 | mutex_unlock(&ubi->buf_mutex); |
558 | +out_put: | |
558 | 559 | ubi_wl_put_peb(ubi, new_pnum, 1); |
559 | 560 | ubi_free_vid_hdr(ubi, vid_hdr); |
560 | 561 | return err; |
... | ... | @@ -567,7 +568,6 @@ |
567 | 568 | ubi_warn("failed to write to PEB %d", new_pnum); |
568 | 569 | ubi_wl_put_peb(ubi, new_pnum, 1); |
569 | 570 | if (++tries > UBI_IO_RETRIES) { |
570 | - mutex_unlock(&ubi->buf_mutex); | |
571 | 571 | ubi_free_vid_hdr(ubi, vid_hdr); |
572 | 572 | return err; |
573 | 573 | } |
... | ... | @@ -949,10 +949,14 @@ |
949 | 949 | * This function copies logical eraseblock from physical eraseblock @from to |
950 | 950 | * physical eraseblock @to. The @vid_hdr buffer may be changed by this |
951 | 951 | * function. Returns: |
952 | - * o %0 in case of success; | |
953 | - * o %1 if the operation was canceled and should be tried later (e.g., | |
954 | - * because a bit-flip was detected at the target PEB); | |
955 | - * o %2 if the volume is being deleted and this LEB should not be moved. | |
952 | + * o %0 in case of success; | |
953 | + * o %1 if the operation was canceled because the volume is being deleted | |
954 | + * or because the PEB was put meanwhile; | |
955 | + * o %2 if the operation was canceled because there was a write error to the | |
956 | + * target PEB; | |
957 | + * o %-EAGAIN if the operation was canceled because a bit-flip was detected | |
958 | + * in the target PEB; | |
959 | + * o a negative error code in case of failure. | |
956 | 960 | */ |
957 | 961 | int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, |
958 | 962 | struct ubi_vid_hdr *vid_hdr) |
... | ... | @@ -978,7 +982,7 @@ |
978 | 982 | /* |
979 | 983 | * Note, we may race with volume deletion, which means that the volume |
980 | 984 | * this logical eraseblock belongs to might be being deleted. Since the |
981 | - * volume deletion unmaps all the volume's logical eraseblocks, it will | |
985 | + * volume deletion un-maps all the volume's logical eraseblocks, it will | |
982 | 986 | * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. |
983 | 987 | */ |
984 | 988 | vol = ubi->volumes[idx]; |
... | ... | @@ -986,7 +990,7 @@ |
986 | 990 | /* No need to do further work, cancel */ |
987 | 991 | dbg_eba("volume %d is being removed, cancel", vol_id); |
988 | 992 | spin_unlock(&ubi->volumes_lock); |
989 | - return 2; | |
993 | + return 1; | |
990 | 994 | } |
991 | 995 | spin_unlock(&ubi->volumes_lock); |
992 | 996 | |
... | ... | @@ -1023,7 +1027,7 @@ |
1023 | 1027 | |
1024 | 1028 | /* |
1025 | 1029 | * OK, now the LEB is locked and we can safely start moving it. Since |
1026 | - * this function utilizes thie @ubi->peb1_buf buffer which is shared | |
1030 | + * this function utilizes the @ubi->peb1_buf buffer which is shared | |
1027 | 1031 | * with some other functions, so lock the buffer by taking the |
1028 | 1032 | * @ubi->buf_mutex. |
1029 | 1033 | */ |
1030 | 1034 | |
... | ... | @@ -1068,8 +1072,11 @@ |
1068 | 1072 | vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); |
1069 | 1073 | |
1070 | 1074 | err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); |
1071 | - if (err) | |
1075 | + if (err) { | |
1076 | + if (err == -EIO) | |
1077 | + err = 2; | |
1072 | 1078 | goto out_unlock_buf; |
1079 | + } | |
1073 | 1080 | |
1074 | 1081 | cond_resched(); |
1075 | 1082 | |
1076 | 1083 | |
1077 | 1084 | |
... | ... | @@ -1079,14 +1086,17 @@ |
1079 | 1086 | if (err != UBI_IO_BITFLIPS) |
1080 | 1087 | ubi_warn("cannot read VID header back from PEB %d", to); |
1081 | 1088 | else |
1082 | - err = 1; | |
1089 | + err = -EAGAIN; | |
1083 | 1090 | goto out_unlock_buf; |
1084 | 1091 | } |
1085 | 1092 | |
1086 | 1093 | if (data_size > 0) { |
1087 | 1094 | err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); |
1088 | - if (err) | |
1095 | + if (err) { | |
1096 | + if (err == -EIO) | |
1097 | + err = 2; | |
1089 | 1098 | goto out_unlock_buf; |
1099 | + } | |
1090 | 1100 | |
1091 | 1101 | cond_resched(); |
1092 | 1102 | |
1093 | 1103 | |
... | ... | @@ -1101,15 +1111,16 @@ |
1101 | 1111 | ubi_warn("cannot read data back from PEB %d", |
1102 | 1112 | to); |
1103 | 1113 | else |
1104 | - err = 1; | |
1114 | + err = -EAGAIN; | |
1105 | 1115 | goto out_unlock_buf; |
1106 | 1116 | } |
1107 | 1117 | |
1108 | 1118 | cond_resched(); |
1109 | 1119 | |
1110 | 1120 | if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { |
1111 | - ubi_warn("read data back from PEB %d - it is different", | |
1112 | - to); | |
1121 | + ubi_warn("read data back from PEB %d and it is " | |
1122 | + "different", to); | |
1123 | + err = -EINVAL; | |
1113 | 1124 | goto out_unlock_buf; |
1114 | 1125 | } |
1115 | 1126 | } |
drivers/mtd/ubi/io.c
... | ... | @@ -637,8 +637,6 @@ |
637 | 637 | |
638 | 638 | dbg_io("read EC header from PEB %d", pnum); |
639 | 639 | ubi_assert(pnum >= 0 && pnum < ubi->peb_count); |
640 | - if (UBI_IO_DEBUG) | |
641 | - verbose = 1; | |
642 | 640 | |
643 | 641 | err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); |
644 | 642 | if (err) { |
... | ... | @@ -685,6 +683,9 @@ |
685 | 683 | if (verbose) |
686 | 684 | ubi_warn("no EC header found at PEB %d, " |
687 | 685 | "only 0xFF bytes", pnum); |
686 | + else if (UBI_IO_DEBUG) | |
687 | + dbg_msg("no EC header found at PEB %d, " | |
688 | + "only 0xFF bytes", pnum); | |
688 | 689 | return UBI_IO_PEB_EMPTY; |
689 | 690 | } |
690 | 691 | |
... | ... | @@ -696,7 +697,9 @@ |
696 | 697 | ubi_warn("bad magic number at PEB %d: %08x instead of " |
697 | 698 | "%08x", pnum, magic, UBI_EC_HDR_MAGIC); |
698 | 699 | ubi_dbg_dump_ec_hdr(ec_hdr); |
699 | - } | |
700 | + } else if (UBI_IO_DEBUG) | |
701 | + dbg_msg("bad magic number at PEB %d: %08x instead of " | |
702 | + "%08x", pnum, magic, UBI_EC_HDR_MAGIC); | |
700 | 703 | return UBI_IO_BAD_EC_HDR; |
701 | 704 | } |
702 | 705 | |
... | ... | @@ -708,7 +711,9 @@ |
708 | 711 | ubi_warn("bad EC header CRC at PEB %d, calculated " |
709 | 712 | "%#08x, read %#08x", pnum, crc, hdr_crc); |
710 | 713 | ubi_dbg_dump_ec_hdr(ec_hdr); |
711 | - } | |
714 | + } else if (UBI_IO_DEBUG) | |
715 | + dbg_msg("bad EC header CRC at PEB %d, calculated " | |
716 | + "%#08x, read %#08x", pnum, crc, hdr_crc); | |
712 | 717 | return UBI_IO_BAD_EC_HDR; |
713 | 718 | } |
714 | 719 | |
... | ... | @@ -912,8 +917,6 @@ |
912 | 917 | |
913 | 918 | dbg_io("read VID header from PEB %d", pnum); |
914 | 919 | ubi_assert(pnum >= 0 && pnum < ubi->peb_count); |
915 | - if (UBI_IO_DEBUG) | |
916 | - verbose = 1; | |
917 | 920 | |
918 | 921 | p = (char *)vid_hdr - ubi->vid_hdr_shift; |
919 | 922 | err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, |
... | ... | @@ -960,6 +963,9 @@ |
960 | 963 | if (verbose) |
961 | 964 | ubi_warn("no VID header found at PEB %d, " |
962 | 965 | "only 0xFF bytes", pnum); |
966 | + else if (UBI_IO_DEBUG) | |
967 | + dbg_msg("no VID header found at PEB %d, " | |
968 | + "only 0xFF bytes", pnum); | |
963 | 969 | return UBI_IO_PEB_FREE; |
964 | 970 | } |
965 | 971 | |
... | ... | @@ -971,7 +977,9 @@ |
971 | 977 | ubi_warn("bad magic number at PEB %d: %08x instead of " |
972 | 978 | "%08x", pnum, magic, UBI_VID_HDR_MAGIC); |
973 | 979 | ubi_dbg_dump_vid_hdr(vid_hdr); |
974 | - } | |
980 | + } else if (UBI_IO_DEBUG) | |
981 | + dbg_msg("bad magic number at PEB %d: %08x instead of " | |
982 | + "%08x", pnum, magic, UBI_VID_HDR_MAGIC); | |
975 | 983 | return UBI_IO_BAD_VID_HDR; |
976 | 984 | } |
977 | 985 | |
... | ... | @@ -983,7 +991,9 @@ |
983 | 991 | ubi_warn("bad CRC at PEB %d, calculated %#08x, " |
984 | 992 | "read %#08x", pnum, crc, hdr_crc); |
985 | 993 | ubi_dbg_dump_vid_hdr(vid_hdr); |
986 | - } | |
994 | + } else if (UBI_IO_DEBUG) | |
995 | + dbg_msg("bad CRC at PEB %d, calculated %#08x, " | |
996 | + "read %#08x", pnum, crc, hdr_crc); | |
987 | 997 | return UBI_IO_BAD_VID_HDR; |
988 | 998 | } |
989 | 999 | |
... | ... | @@ -1024,7 +1034,7 @@ |
1024 | 1034 | |
1025 | 1035 | err = paranoid_check_peb_ec_hdr(ubi, pnum); |
1026 | 1036 | if (err) |
1027 | - return err > 0 ? -EINVAL: err; | |
1037 | + return err > 0 ? -EINVAL : err; | |
1028 | 1038 | |
1029 | 1039 | vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); |
1030 | 1040 | vid_hdr->version = UBI_VERSION; |
drivers/mtd/ubi/ubi.h
... | ... | @@ -74,6 +74,13 @@ |
74 | 74 | #define UBI_IO_RETRIES 3 |
75 | 75 | |
76 | 76 | /* |
77 | + * Length of the protection queue. The length is effectively equivalent to the | |
78 | + * number of (global) erase cycles PEBs are protected from the wear-leveling | |
79 | + * worker. | |
80 | + */ | |
81 | +#define UBI_PROT_QUEUE_LEN 10 | |
82 | + | |
83 | +/* | |
77 | 84 | * Error codes returned by the I/O sub-system. |
78 | 85 | * |
79 | 86 | * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only |
... | ... | @@ -95,7 +102,8 @@ |
95 | 102 | |
96 | 103 | /** |
97 | 104 | * struct ubi_wl_entry - wear-leveling entry. |
98 | - * @rb: link in the corresponding RB-tree | |
105 | + * @u.rb: link in the corresponding (free/used) RB-tree | |
106 | + * @u.list: link in the protection queue | |
99 | 107 | * @ec: erase counter |
100 | 108 | * @pnum: physical eraseblock number |
101 | 109 | * |
... | ... | @@ -104,7 +112,10 @@ |
104 | 112 | * RB-trees. See WL sub-system for details. |
105 | 113 | */ |
106 | 114 | struct ubi_wl_entry { |
107 | - struct rb_node rb; | |
115 | + union { | |
116 | + struct rb_node rb; | |
117 | + struct list_head list; | |
118 | + } u; | |
108 | 119 | int ec; |
109 | 120 | int pnum; |
110 | 121 | }; |
... | ... | @@ -288,7 +299,7 @@ |
288 | 299 | * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling |
289 | 300 | * |
290 | 301 | * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end |
291 | - * of UBI ititializetion | |
302 | + * of UBI initialization | |
292 | 303 | * @vtbl_slots: how many slots are available in the volume table |
293 | 304 | * @vtbl_size: size of the volume table in bytes |
294 | 305 | * @vtbl: in-RAM volume table copy |
295 | 306 | |
296 | 307 | |
... | ... | @@ -306,18 +317,17 @@ |
306 | 317 | * @used: RB-tree of used physical eraseblocks |
307 | 318 | * @free: RB-tree of free physical eraseblocks |
308 | 319 | * @scrub: RB-tree of physical eraseblocks which need scrubbing |
309 | - * @prot: protection trees | |
310 | - * @prot.pnum: protection tree indexed by physical eraseblock numbers | |
311 | - * @prot.aec: protection tree indexed by absolute erase counter value | |
312 | - * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, | |
313 | - * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works | |
314 | - * fields | |
320 | + * @pq: protection queue (contain physical eraseblocks which are temporarily | |
321 | + * protected from the wear-leveling worker) | |
322 | + * @pq_head: protection queue head | |
323 | + * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, | |
324 | + * @move_to, @move_to_put @erase_pending, @wl_scheduled and @works | |
325 | + * fields | |
315 | 326 | * @move_mutex: serializes eraseblock moves |
316 | - * @work_sem: sycnhronizes the WL worker with use tasks | |
327 | + * @work_sem: synchronizes the WL worker with use tasks | |
317 | 328 | * @wl_scheduled: non-zero if the wear-leveling was scheduled |
318 | 329 | * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any |
319 | 330 | * physical eraseblock |
320 | - * @abs_ec: absolute erase counter | |
321 | 331 | * @move_from: physical eraseblock from where the data is being moved |
322 | 332 | * @move_to: physical eraseblock where the data is being moved to |
323 | 333 | * @move_to_put: if the "to" PEB was put |
324 | 334 | |
325 | 335 | |
... | ... | @@ -351,11 +361,11 @@ |
351 | 361 | * |
352 | 362 | * @peb_buf1: a buffer of PEB size used for different purposes |
353 | 363 | * @peb_buf2: another buffer of PEB size used for different purposes |
354 | - * @buf_mutex: proptects @peb_buf1 and @peb_buf2 | |
364 | + * @buf_mutex: protects @peb_buf1 and @peb_buf2 | |
355 | 365 | * @ckvol_mutex: serializes static volume checking when opening |
356 | - * @mult_mutex: serializes operations on multiple volumes, like re-nameing | |
366 | + * @mult_mutex: serializes operations on multiple volumes, like re-naming | |
357 | 367 | * @dbg_peb_buf: buffer of PEB size used for debugging |
358 | - * @dbg_buf_mutex: proptects @dbg_peb_buf | |
368 | + * @dbg_buf_mutex: protects @dbg_peb_buf | |
359 | 369 | */ |
360 | 370 | struct ubi_device { |
361 | 371 | struct cdev cdev; |
362 | 372 | |
... | ... | @@ -392,16 +402,13 @@ |
392 | 402 | struct rb_root used; |
393 | 403 | struct rb_root free; |
394 | 404 | struct rb_root scrub; |
395 | - struct { | |
396 | - struct rb_root pnum; | |
397 | - struct rb_root aec; | |
398 | - } prot; | |
405 | + struct list_head pq[UBI_PROT_QUEUE_LEN]; | |
406 | + int pq_head; | |
399 | 407 | spinlock_t wl_lock; |
400 | 408 | struct mutex move_mutex; |
401 | 409 | struct rw_semaphore work_sem; |
402 | 410 | int wl_scheduled; |
403 | 411 | struct ubi_wl_entry **lookuptbl; |
404 | - unsigned long long abs_ec; | |
405 | 412 | struct ubi_wl_entry *move_from; |
406 | 413 | struct ubi_wl_entry *move_to; |
407 | 414 | int move_to_put; |
drivers/mtd/ubi/wl.c
... | ... | @@ -22,7 +22,7 @@ |
22 | 22 | * UBI wear-leveling sub-system. |
23 | 23 | * |
24 | 24 | * This sub-system is responsible for wear-leveling. It works in terms of |
25 | - * physical* eraseblocks and erase counters and knows nothing about logical | |
25 | + * physical eraseblocks and erase counters and knows nothing about logical | |
26 | 26 | * eraseblocks, volumes, etc. From this sub-system's perspective all physical |
27 | 27 | * eraseblocks are of two types - used and free. Used physical eraseblocks are |
28 | 28 | * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical |
29 | 29 | |
... | ... | @@ -55,9 +55,40 @@ |
55 | 55 | * |
56 | 56 | * As it was said, for the UBI sub-system all physical eraseblocks are either |
57 | 57 | * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while |
58 | - * used eraseblocks are kept in a set of different RB-trees: @wl->used, | |
59 | - * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. | |
58 | + * used eraseblocks are kept in @wl->used or @wl->scrub RB-trees, or | |
59 | + * (temporarily) in the @wl->pq queue. | |
60 | 60 | * |
61 | + * When the WL sub-system returns a physical eraseblock, the physical | |
62 | + * eraseblock is protected from being moved for some "time". For this reason, | |
63 | + * the physical eraseblock is not directly moved from the @wl->free tree to the | |
64 | + * @wl->used tree. There is a protection queue in between where this | |
65 | + * physical eraseblock is temporarily stored (@wl->pq). | |
66 | + * | |
67 | + * All this protection stuff is needed because: | |
68 | + * o we don't want to move physical eraseblocks just after we have given them | |
69 | + * to the user; instead, we first want to let users fill them up with data; | |
70 | + * | |
71 | + * o there is a chance that the user will put the physical eraseblock very | |
72 | + * soon, so it makes sense not to move it for some time, but wait; this is | |
73 | + * especially important in case of "short term" physical eraseblocks. | |
74 | + * | |
75 | + * Physical eraseblocks stay protected only for limited time. But the "time" is | |
76 | + * measured in erase cycles in this case. This is implemented with help of the | |
77 | + * protection queue. Eraseblocks are put to the tail of this queue when they | |
78 | + * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the | |
79 | + * head of the queue on each erase operation (for any eraseblock). So the | |
80 | + * length of the queue defines how may (global) erase cycles PEBs are protected. | |
81 | + * | |
82 | + * To put it differently, each physical eraseblock has 2 main states: free and | |
83 | + * used. The former state corresponds to the @wl->free tree. The latter state | |
84 | + * is split up on several sub-states: | |
85 | + * o the WL movement is allowed (@wl->used tree); | |
86 | + * o the WL movement is temporarily prohibited (@wl->pq queue); | |
87 | + * o scrubbing is needed (@wl->scrub tree). | |
88 | + * | |
89 | + * Depending on the sub-state, wear-leveling entries of the used physical | |
90 | + * eraseblocks may be kept in one of those structures. | |
91 | + * | |
61 | 92 | * Note, in this implementation, we keep a small in-RAM object for each physical |
62 | 93 | * eraseblock. This is surely not a scalable solution. But it appears to be good |
63 | 94 | * enough for moderately large flashes and it is simple. In future, one may |
... | ... | @@ -70,9 +101,6 @@ |
70 | 101 | * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we |
71 | 102 | * pick target PEB with an average EC if our PEB is not very "old". This is a |
72 | 103 | * room for future re-works of the WL sub-system. |
73 | - * | |
74 | - * Note: the stuff with protection trees looks too complex and is difficult to | |
75 | - * understand. Should be fixed. | |
76 | 104 | */ |
77 | 105 | |
78 | 106 | #include <linux/slab.h> |
... | ... | @@ -85,14 +113,6 @@ |
85 | 113 | #define WL_RESERVED_PEBS 1 |
86 | 114 | |
87 | 115 | /* |
88 | - * How many erase cycles are short term, unknown, and long term physical | |
89 | - * eraseblocks protected. | |
90 | - */ | |
91 | -#define ST_PROTECTION 16 | |
92 | -#define U_PROTECTION 10 | |
93 | -#define LT_PROTECTION 4 | |
94 | - | |
95 | -/* | |
96 | 116 | * Maximum difference between two erase counters. If this threshold is |
97 | 117 | * exceeded, the WL sub-system starts moving data from used physical |
98 | 118 | * eraseblocks with low erase counter to free physical eraseblocks with high |
99 | 119 | |
... | ... | @@ -120,64 +140,9 @@ |
120 | 140 | #define WL_MAX_FAILURES 32 |
121 | 141 | |
122 | 142 | /** |
123 | - * struct ubi_wl_prot_entry - PEB protection entry. | |
124 | - * @rb_pnum: link in the @wl->prot.pnum RB-tree | |
125 | - * @rb_aec: link in the @wl->prot.aec RB-tree | |
126 | - * @abs_ec: the absolute erase counter value when the protection ends | |
127 | - * @e: the wear-leveling entry of the physical eraseblock under protection | |
128 | - * | |
129 | - * When the WL sub-system returns a physical eraseblock, the physical | |
130 | - * eraseblock is protected from being moved for some "time". For this reason, | |
131 | - * the physical eraseblock is not directly moved from the @wl->free tree to the | |
132 | - * @wl->used tree. There is one more tree in between where this physical | |
133 | - * eraseblock is temporarily stored (@wl->prot). | |
134 | - * | |
135 | - * All this protection stuff is needed because: | |
136 | - * o we don't want to move physical eraseblocks just after we have given them | |
137 | - * to the user; instead, we first want to let users fill them up with data; | |
138 | - * | |
139 | - * o there is a chance that the user will put the physical eraseblock very | |
140 | - * soon, so it makes sense not to move it for some time, but wait; this is | |
141 | - * especially important in case of "short term" physical eraseblocks. | |
142 | - * | |
143 | - * Physical eraseblocks stay protected only for limited time. But the "time" is | |
144 | - * measured in erase cycles in this case. This is implemented with help of the | |
145 | - * absolute erase counter (@wl->abs_ec). When it reaches certain value, the | |
146 | - * physical eraseblocks are moved from the protection trees (@wl->prot.*) to | |
147 | - * the @wl->used tree. | |
148 | - * | |
149 | - * Protected physical eraseblocks are searched by physical eraseblock number | |
150 | - * (when they are put) and by the absolute erase counter (to check if it is | |
151 | - * time to move them to the @wl->used tree). So there are actually 2 RB-trees | |
152 | - * storing the protected physical eraseblocks: @wl->prot.pnum and | |
153 | - * @wl->prot.aec. They are referred to as the "protection" trees. The | |
154 | - * first one is indexed by the physical eraseblock number. The second one is | |
155 | - * indexed by the absolute erase counter. Both trees store | |
156 | - * &struct ubi_wl_prot_entry objects. | |
157 | - * | |
158 | - * Each physical eraseblock has 2 main states: free and used. The former state | |
159 | - * corresponds to the @wl->free tree. The latter state is split up on several | |
160 | - * sub-states: | |
161 | - * o the WL movement is allowed (@wl->used tree); | |
162 | - * o the WL movement is temporarily prohibited (@wl->prot.pnum and | |
163 | - * @wl->prot.aec trees); | |
164 | - * o scrubbing is needed (@wl->scrub tree). | |
165 | - * | |
166 | - * Depending on the sub-state, wear-leveling entries of the used physical | |
167 | - * eraseblocks may be kept in one of those trees. | |
168 | - */ | |
169 | -struct ubi_wl_prot_entry { | |
170 | - struct rb_node rb_pnum; | |
171 | - struct rb_node rb_aec; | |
172 | - unsigned long long abs_ec; | |
173 | - struct ubi_wl_entry *e; | |
174 | -}; | |
175 | - | |
176 | -/** | |
177 | 143 | * struct ubi_work - UBI work description data structure. |
178 | 144 | * @list: a link in the list of pending works |
179 | 145 | * @func: worker function |
180 | - * @priv: private data of the worker function | |
181 | 146 | * @e: physical eraseblock to erase |
182 | 147 | * @torture: if the physical eraseblock has to be tortured |
183 | 148 | * |
184 | 149 | |
... | ... | @@ -198,9 +163,11 @@ |
198 | 163 | static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); |
199 | 164 | static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, |
200 | 165 | struct rb_root *root); |
166 | +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); | |
201 | 167 | #else |
202 | 168 | #define paranoid_check_ec(ubi, pnum, ec) 0 |
203 | 169 | #define paranoid_check_in_wl_tree(e, root) |
170 | +#define paranoid_check_in_pq(ubi, e) 0 | |
204 | 171 | #endif |
205 | 172 | |
206 | 173 | /** |
... | ... | @@ -220,7 +187,7 @@ |
220 | 187 | struct ubi_wl_entry *e1; |
221 | 188 | |
222 | 189 | parent = *p; |
223 | - e1 = rb_entry(parent, struct ubi_wl_entry, rb); | |
190 | + e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); | |
224 | 191 | |
225 | 192 | if (e->ec < e1->ec) |
226 | 193 | p = &(*p)->rb_left; |
... | ... | @@ -235,8 +202,8 @@ |
235 | 202 | } |
236 | 203 | } |
237 | 204 | |
238 | - rb_link_node(&e->rb, parent, p); | |
239 | - rb_insert_color(&e->rb, root); | |
205 | + rb_link_node(&e->u.rb, parent, p); | |
206 | + rb_insert_color(&e->u.rb, root); | |
240 | 207 | } |
241 | 208 | |
242 | 209 | /** |
... | ... | @@ -331,7 +298,7 @@ |
331 | 298 | while (p) { |
332 | 299 | struct ubi_wl_entry *e1; |
333 | 300 | |
334 | - e1 = rb_entry(p, struct ubi_wl_entry, rb); | |
301 | + e1 = rb_entry(p, struct ubi_wl_entry, u.rb); | |
335 | 302 | |
336 | 303 | if (e->pnum == e1->pnum) { |
337 | 304 | ubi_assert(e == e1); |
338 | 305 | |
339 | 306 | |
340 | 307 | |
341 | 308 | |
342 | 309 | |
... | ... | @@ -355,50 +322,24 @@ |
355 | 322 | } |
356 | 323 | |
357 | 324 | /** |
358 | - * prot_tree_add - add physical eraseblock to protection trees. | |
325 | + * prot_queue_add - add physical eraseblock to the protection queue. | |
359 | 326 | * @ubi: UBI device description object |
360 | 327 | * @e: the physical eraseblock to add |
361 | - * @pe: protection entry object to use | |
362 | - * @abs_ec: absolute erase counter value when this physical eraseblock has | |
363 | - * to be removed from the protection trees. | |
364 | 328 | * |
365 | - * @wl->lock has to be locked. | |
329 | + * This function adds @e to the tail of the protection queue @ubi->pq, where | |
330 | + * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be | |
331 | + * temporarily protected from the wear-leveling worker. Note, @wl->lock has to | |
332 | + * be locked. | |
366 | 333 | */ |
367 | -static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, | |
368 | - struct ubi_wl_prot_entry *pe, int abs_ec) | |
334 | +static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) | |
369 | 335 | { |
370 | - struct rb_node **p, *parent = NULL; | |
371 | - struct ubi_wl_prot_entry *pe1; | |
336 | + int pq_tail = ubi->pq_head - 1; | |
372 | 337 | |
373 | - pe->e = e; | |
374 | - pe->abs_ec = ubi->abs_ec + abs_ec; | |
375 | - | |
376 | - p = &ubi->prot.pnum.rb_node; | |
377 | - while (*p) { | |
378 | - parent = *p; | |
379 | - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); | |
380 | - | |
381 | - if (e->pnum < pe1->e->pnum) | |
382 | - p = &(*p)->rb_left; | |
383 | - else | |
384 | - p = &(*p)->rb_right; | |
385 | - } | |
386 | - rb_link_node(&pe->rb_pnum, parent, p); | |
387 | - rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); | |
388 | - | |
389 | - p = &ubi->prot.aec.rb_node; | |
390 | - parent = NULL; | |
391 | - while (*p) { | |
392 | - parent = *p; | |
393 | - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); | |
394 | - | |
395 | - if (pe->abs_ec < pe1->abs_ec) | |
396 | - p = &(*p)->rb_left; | |
397 | - else | |
398 | - p = &(*p)->rb_right; | |
399 | - } | |
400 | - rb_link_node(&pe->rb_aec, parent, p); | |
401 | - rb_insert_color(&pe->rb_aec, &ubi->prot.aec); | |
338 | + if (pq_tail < 0) | |
339 | + pq_tail = UBI_PROT_QUEUE_LEN - 1; | |
340 | + ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); | |
341 | + list_add_tail(&e->u.list, &ubi->pq[pq_tail]); | |
342 | + dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); | |
402 | 343 | } |
403 | 344 | |
404 | 345 | /** |
405 | 346 | |
... | ... | @@ -414,14 +355,14 @@ |
414 | 355 | struct rb_node *p; |
415 | 356 | struct ubi_wl_entry *e; |
416 | 357 | |
417 | - e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); | |
358 | + e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); | |
418 | 359 | max += e->ec; |
419 | 360 | |
420 | 361 | p = root->rb_node; |
421 | 362 | while (p) { |
422 | 363 | struct ubi_wl_entry *e1; |
423 | 364 | |
424 | - e1 = rb_entry(p, struct ubi_wl_entry, rb); | |
365 | + e1 = rb_entry(p, struct ubi_wl_entry, u.rb); | |
425 | 366 | if (e1->ec >= max) |
426 | 367 | p = p->rb_left; |
427 | 368 | else { |
428 | 369 | |
429 | 370 | |
... | ... | @@ -443,17 +384,12 @@ |
443 | 384 | */ |
444 | 385 | int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) |
445 | 386 | { |
446 | - int err, protect, medium_ec; | |
387 | + int err, medium_ec; | |
447 | 388 | struct ubi_wl_entry *e, *first, *last; |
448 | - struct ubi_wl_prot_entry *pe; | |
449 | 389 | |
450 | 390 | ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || |
451 | 391 | dtype == UBI_UNKNOWN); |
452 | 392 | |
453 | - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); | |
454 | - if (!pe) | |
455 | - return -ENOMEM; | |
456 | - | |
457 | 393 | retry: |
458 | 394 | spin_lock(&ubi->wl_lock); |
459 | 395 | if (!ubi->free.rb_node) { |
460 | 396 | |
461 | 397 | |
... | ... | @@ -461,16 +397,13 @@ |
461 | 397 | ubi_assert(list_empty(&ubi->works)); |
462 | 398 | ubi_err("no free eraseblocks"); |
463 | 399 | spin_unlock(&ubi->wl_lock); |
464 | - kfree(pe); | |
465 | 400 | return -ENOSPC; |
466 | 401 | } |
467 | 402 | spin_unlock(&ubi->wl_lock); |
468 | 403 | |
469 | 404 | err = produce_free_peb(ubi); |
470 | - if (err < 0) { | |
471 | - kfree(pe); | |
405 | + if (err < 0) | |
472 | 406 | return err; |
473 | - } | |
474 | 407 | goto retry; |
475 | 408 | } |
476 | 409 | |
... | ... | @@ -483,7 +416,6 @@ |
483 | 416 | * %WL_FREE_MAX_DIFF. |
484 | 417 | */ |
485 | 418 | e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); |
486 | - protect = LT_PROTECTION; | |
487 | 419 | break; |
488 | 420 | case UBI_UNKNOWN: |
489 | 421 | /* |
490 | 422 | |
491 | 423 | |
492 | 424 | |
493 | 425 | |
494 | 426 | |
495 | 427 | |
496 | 428 | |
497 | 429 | |
498 | 430 | |
499 | 431 | |
500 | 432 | |
501 | 433 | |
502 | 434 | |
503 | 435 | |
504 | 436 | |
... | ... | @@ -492,81 +424,63 @@ |
492 | 424 | * eraseblock with erase counter greater or equivalent than the |
493 | 425 | * lowest erase counter plus %WL_FREE_MAX_DIFF. |
494 | 426 | */ |
495 | - first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb); | |
496 | - last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb); | |
427 | + first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, | |
428 | + u.rb); | |
429 | + last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, u.rb); | |
497 | 430 | |
498 | 431 | if (last->ec - first->ec < WL_FREE_MAX_DIFF) |
499 | 432 | e = rb_entry(ubi->free.rb_node, |
500 | - struct ubi_wl_entry, rb); | |
433 | + struct ubi_wl_entry, u.rb); | |
501 | 434 | else { |
502 | 435 | medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; |
503 | 436 | e = find_wl_entry(&ubi->free, medium_ec); |
504 | 437 | } |
505 | - protect = U_PROTECTION; | |
506 | 438 | break; |
507 | 439 | case UBI_SHORTTERM: |
508 | 440 | /* |
509 | 441 | * For short term data we pick a physical eraseblock with the |
510 | 442 | * lowest erase counter as we expect it will be erased soon. |
511 | 443 | */ |
512 | - e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb); | |
513 | - protect = ST_PROTECTION; | |
444 | + e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb); | |
514 | 445 | break; |
515 | 446 | default: |
516 | - protect = 0; | |
517 | - e = NULL; | |
518 | 447 | BUG(); |
519 | 448 | } |
520 | 449 | |
450 | + paranoid_check_in_wl_tree(e, &ubi->free); | |
451 | + | |
521 | 452 | /* |
522 | - * Move the physical eraseblock to the protection trees where it will | |
453 | + * Move the physical eraseblock to the protection queue where it will | |
523 | 454 | * be protected from being moved for some time. |
524 | 455 | */ |
525 | - paranoid_check_in_wl_tree(e, &ubi->free); | |
526 | - rb_erase(&e->rb, &ubi->free); | |
527 | - prot_tree_add(ubi, e, pe, protect); | |
528 | - | |
529 | - dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); | |
456 | + rb_erase(&e->u.rb, &ubi->free); | |
457 | + dbg_wl("PEB %d EC %d", e->pnum, e->ec); | |
458 | + prot_queue_add(ubi, e); | |
530 | 459 | spin_unlock(&ubi->wl_lock); |
531 | - | |
532 | 460 | return e->pnum; |
533 | 461 | } |
534 | 462 | |
535 | 463 | /** |
536 | - * prot_tree_del - remove a physical eraseblock from the protection trees | |
464 | + * prot_queue_del - remove a physical eraseblock from the protection queue. | |
537 | 465 | * @ubi: UBI device description object |
538 | 466 | * @pnum: the physical eraseblock to remove |
539 | 467 | * |
540 | - * This function returns PEB @pnum from the protection trees and returns zero | |
541 | - * in case of success and %-ENODEV if the PEB was not found in the protection | |
542 | - * trees. | |
468 | + * This function deletes PEB @pnum from the protection queue and returns zero | |
469 | + * in case of success and %-ENODEV if the PEB was not found. | |
543 | 470 | */ |
544 | -static int prot_tree_del(struct ubi_device *ubi, int pnum) | |
471 | +static int prot_queue_del(struct ubi_device *ubi, int pnum) | |
545 | 472 | { |
546 | - struct rb_node *p; | |
547 | - struct ubi_wl_prot_entry *pe = NULL; | |
473 | + struct ubi_wl_entry *e; | |
548 | 474 | |
549 | - p = ubi->prot.pnum.rb_node; | |
550 | - while (p) { | |
475 | + e = ubi->lookuptbl[pnum]; | |
476 | + if (!e) | |
477 | + return -ENODEV; | |
551 | 478 | |
552 | - pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); | |
479 | + if (paranoid_check_in_pq(ubi, e)) | |
480 | + return -ENODEV; | |
553 | 481 | |
554 | - if (pnum == pe->e->pnum) | |
555 | - goto found; | |
556 | - | |
557 | - if (pnum < pe->e->pnum) | |
558 | - p = p->rb_left; | |
559 | - else | |
560 | - p = p->rb_right; | |
561 | - } | |
562 | - | |
563 | - return -ENODEV; | |
564 | - | |
565 | -found: | |
566 | - ubi_assert(pe->e->pnum == pnum); | |
567 | - rb_erase(&pe->rb_aec, &ubi->prot.aec); | |
568 | - rb_erase(&pe->rb_pnum, &ubi->prot.pnum); | |
569 | - kfree(pe); | |
482 | + list_del(&e->u.list); | |
483 | + dbg_wl("deleted PEB %d from the protection queue", e->pnum); | |
570 | 484 | return 0; |
571 | 485 | } |
572 | 486 | |
573 | 487 | |
574 | 488 | |
575 | 489 | |
576 | 490 | |
577 | 491 | |
578 | 492 | |
579 | 493 | |
580 | 494 | |
... | ... | @@ -632,47 +546,47 @@ |
632 | 546 | } |
633 | 547 | |
634 | 548 | /** |
635 | - * check_protection_over - check if it is time to stop protecting some PEBs. | |
549 | + * serve_prot_queue - check if it is time to stop protecting PEBs. | |
636 | 550 | * @ubi: UBI device description object |
637 | 551 | * |
638 | - * This function is called after each erase operation, when the absolute erase | |
639 | - * counter is incremented, to check if some physical eraseblock have not to be | |
640 | - * protected any longer. These physical eraseblocks are moved from the | |
641 | - * protection trees to the used tree. | |
552 | + * This function is called after each erase operation and removes PEBs from the | |
553 | + * tail of the protection queue. These PEBs have been protected for long enough | |
554 | + * and should be moved to the used tree. | |
642 | 555 | */ |
643 | -static void check_protection_over(struct ubi_device *ubi) | |
556 | +static void serve_prot_queue(struct ubi_device *ubi) | |
644 | 557 | { |
645 | - struct ubi_wl_prot_entry *pe; | |
558 | + struct ubi_wl_entry *e, *tmp; | |
559 | + int count; | |
646 | 560 | |
647 | 561 | /* |
648 | 562 | * There may be several protected physical eraseblock to remove, |
649 | 563 | * process them all. |
650 | 564 | */ |
651 | - while (1) { | |
652 | - spin_lock(&ubi->wl_lock); | |
653 | - if (!ubi->prot.aec.rb_node) { | |
654 | - spin_unlock(&ubi->wl_lock); | |
655 | - break; | |
656 | - } | |
565 | +repeat: | |
566 | + count = 0; | |
567 | + spin_lock(&ubi->wl_lock); | |
568 | + list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { | |
569 | + dbg_wl("PEB %d EC %d protection over, move to used tree", | |
570 | + e->pnum, e->ec); | |
657 | 571 | |
658 | - pe = rb_entry(rb_first(&ubi->prot.aec), | |
659 | - struct ubi_wl_prot_entry, rb_aec); | |
660 | - | |
661 | - if (pe->abs_ec > ubi->abs_ec) { | |
572 | + list_del(&e->u.list); | |
573 | + wl_tree_add(e, &ubi->used); | |
574 | + if (count++ > 32) { | |
575 | + /* | |
576 | + * Let's be nice and avoid holding the spinlock for | |
577 | + * too long. | |
578 | + */ | |
662 | 579 | spin_unlock(&ubi->wl_lock); |
663 | - break; | |
580 | + cond_resched(); | |
581 | + goto repeat; | |
664 | 582 | } |
665 | - | |
666 | - dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", | |
667 | - pe->e->pnum, ubi->abs_ec, pe->abs_ec); | |
668 | - rb_erase(&pe->rb_aec, &ubi->prot.aec); | |
669 | - rb_erase(&pe->rb_pnum, &ubi->prot.pnum); | |
670 | - wl_tree_add(pe->e, &ubi->used); | |
671 | - spin_unlock(&ubi->wl_lock); | |
672 | - | |
673 | - kfree(pe); | |
674 | - cond_resched(); | |
675 | 583 | } |
584 | + | |
585 | + ubi->pq_head += 1; | |
586 | + if (ubi->pq_head == UBI_PROT_QUEUE_LEN) | |
587 | + ubi->pq_head = 0; | |
588 | + ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); | |
589 | + spin_unlock(&ubi->wl_lock); | |
676 | 590 | } |
677 | 591 | |
678 | 592 | /** |
... | ... | @@ -680,8 +594,8 @@ |
680 | 594 | * @ubi: UBI device description object |
681 | 595 | * @wrk: the work to schedule |
682 | 596 | * |
683 | - * This function enqueues a work defined by @wrk to the tail of the pending | |
684 | - * works list. | |
597 | + * This function adds a work defined by @wrk to the tail of the pending works | |
598 | + * list. | |
685 | 599 | */ |
686 | 600 | static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) |
687 | 601 | { |
688 | 602 | |
... | ... | @@ -739,13 +653,11 @@ |
739 | 653 | static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, |
740 | 654 | int cancel) |
741 | 655 | { |
742 | - int err, put = 0, scrubbing = 0, protect = 0; | |
743 | - struct ubi_wl_prot_entry *uninitialized_var(pe); | |
656 | + int err, scrubbing = 0, torture = 0; | |
744 | 657 | struct ubi_wl_entry *e1, *e2; |
745 | 658 | struct ubi_vid_hdr *vid_hdr; |
746 | 659 | |
747 | 660 | kfree(wrk); |
748 | - | |
749 | 661 | if (cancel) |
750 | 662 | return 0; |
751 | 663 | |
... | ... | @@ -781,7 +693,7 @@ |
781 | 693 | * highly worn-out free physical eraseblock. If the erase |
782 | 694 | * counters differ much enough, start wear-leveling. |
783 | 695 | */ |
784 | - e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); | |
696 | + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); | |
785 | 697 | e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); |
786 | 698 | |
787 | 699 | if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { |
788 | 700 | |
789 | 701 | |
790 | 702 | |
... | ... | @@ -790,21 +702,21 @@ |
790 | 702 | goto out_cancel; |
791 | 703 | } |
792 | 704 | paranoid_check_in_wl_tree(e1, &ubi->used); |
793 | - rb_erase(&e1->rb, &ubi->used); | |
705 | + rb_erase(&e1->u.rb, &ubi->used); | |
794 | 706 | dbg_wl("move PEB %d EC %d to PEB %d EC %d", |
795 | 707 | e1->pnum, e1->ec, e2->pnum, e2->ec); |
796 | 708 | } else { |
797 | 709 | /* Perform scrubbing */ |
798 | 710 | scrubbing = 1; |
799 | - e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); | |
711 | + e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); | |
800 | 712 | e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); |
801 | 713 | paranoid_check_in_wl_tree(e1, &ubi->scrub); |
802 | - rb_erase(&e1->rb, &ubi->scrub); | |
714 | + rb_erase(&e1->u.rb, &ubi->scrub); | |
803 | 715 | dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); |
804 | 716 | } |
805 | 717 | |
806 | 718 | paranoid_check_in_wl_tree(e2, &ubi->free); |
807 | - rb_erase(&e2->rb, &ubi->free); | |
719 | + rb_erase(&e2->u.rb, &ubi->free); | |
808 | 720 | ubi->move_from = e1; |
809 | 721 | ubi->move_to = e2; |
810 | 722 | spin_unlock(&ubi->wl_lock); |
811 | 723 | |
812 | 724 | |
813 | 725 | |
814 | 726 | |
815 | 727 | |
816 | 728 | |
817 | 729 | |
818 | 730 | |
819 | 731 | |
820 | 732 | |
... | ... | @@ -844,46 +756,67 @@ |
844 | 756 | |
845 | 757 | err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); |
846 | 758 | if (err) { |
847 | - | |
759 | + if (err == -EAGAIN) | |
760 | + goto out_not_moved; | |
848 | 761 | if (err < 0) |
849 | 762 | goto out_error; |
850 | - if (err == 1) | |
763 | + if (err == 2) { | |
764 | + /* Target PEB write error, torture it */ | |
765 | + torture = 1; | |
851 | 766 | goto out_not_moved; |
767 | + } | |
852 | 768 | |
853 | 769 | /* |
854 | - * For some reason the LEB was not moved - it might be because | |
855 | - * the volume is being deleted. We should prevent this PEB from | |
856 | - * being selected for wear-levelling movement for some "time", | |
857 | - * so put it to the protection tree. | |
770 | + * The LEB has not been moved because the volume is being | |
771 | + * deleted or the PEB has been put meanwhile. We should prevent | |
772 | + * this PEB from being selected for wear-leveling movement | |
773 | + * again, so put it to the protection queue. | |
858 | 774 | */ |
859 | 775 | |
860 | - dbg_wl("cancelled moving PEB %d", e1->pnum); | |
861 | - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); | |
862 | - if (!pe) { | |
863 | - err = -ENOMEM; | |
864 | - goto out_error; | |
865 | - } | |
776 | + dbg_wl("canceled moving PEB %d", e1->pnum); | |
777 | + ubi_assert(err == 1); | |
866 | 778 | |
867 | - protect = 1; | |
779 | + ubi_free_vid_hdr(ubi, vid_hdr); | |
780 | + vid_hdr = NULL; | |
781 | + | |
782 | + spin_lock(&ubi->wl_lock); | |
783 | + prot_queue_add(ubi, e1); | |
784 | + ubi_assert(!ubi->move_to_put); | |
785 | + ubi->move_from = ubi->move_to = NULL; | |
786 | + ubi->wl_scheduled = 0; | |
787 | + spin_unlock(&ubi->wl_lock); | |
788 | + | |
789 | + e1 = NULL; | |
790 | + err = schedule_erase(ubi, e2, 0); | |
791 | + if (err) | |
792 | + goto out_error; | |
793 | + mutex_unlock(&ubi->move_mutex); | |
794 | + return 0; | |
868 | 795 | } |
869 | 796 | |
797 | + /* The PEB has been successfully moved */ | |
870 | 798 | ubi_free_vid_hdr(ubi, vid_hdr); |
871 | - if (scrubbing && !protect) | |
799 | + vid_hdr = NULL; | |
800 | + if (scrubbing) | |
872 | 801 | ubi_msg("scrubbed PEB %d, data moved to PEB %d", |
873 | 802 | e1->pnum, e2->pnum); |
874 | 803 | |
875 | 804 | spin_lock(&ubi->wl_lock); |
876 | - if (protect) | |
877 | - prot_tree_add(ubi, e1, pe, protect); | |
878 | - if (!ubi->move_to_put) | |
805 | + if (!ubi->move_to_put) { | |
879 | 806 | wl_tree_add(e2, &ubi->used); |
880 | - else | |
881 | - put = 1; | |
807 | + e2 = NULL; | |
808 | + } | |
882 | 809 | ubi->move_from = ubi->move_to = NULL; |
883 | 810 | ubi->move_to_put = ubi->wl_scheduled = 0; |
884 | 811 | spin_unlock(&ubi->wl_lock); |
885 | 812 | |
886 | - if (put) { | |
813 | + err = schedule_erase(ubi, e1, 0); | |
814 | + if (err) { | |
815 | + e1 = NULL; | |
816 | + goto out_error; | |
817 | + } | |
818 | + | |
819 | + if (e2) { | |
887 | 820 | /* |
888 | 821 | * Well, the target PEB was put meanwhile, schedule it for |
889 | 822 | * erasure. |
... | ... | @@ -894,13 +827,6 @@ |
894 | 827 | goto out_error; |
895 | 828 | } |
896 | 829 | |
897 | - if (!protect) { | |
898 | - err = schedule_erase(ubi, e1, 0); | |
899 | - if (err) | |
900 | - goto out_error; | |
901 | - } | |
902 | - | |
903 | - | |
904 | 830 | dbg_wl("done"); |
905 | 831 | mutex_unlock(&ubi->move_mutex); |
906 | 832 | return 0; |
907 | 833 | |
908 | 834 | |
909 | 835 | |
910 | 836 | |
911 | 837 | |
... | ... | @@ -908,20 +834,24 @@ |
908 | 834 | /* |
909 | 835 | * For some reasons the LEB was not moved, might be an error, might be |
910 | 836 | * something else. @e1 was not changed, so return it back. @e2 might |
911 | - * be changed, schedule it for erasure. | |
837 | + * have been changed, schedule it for erasure. | |
912 | 838 | */ |
913 | 839 | out_not_moved: |
840 | + dbg_wl("canceled moving PEB %d", e1->pnum); | |
914 | 841 | ubi_free_vid_hdr(ubi, vid_hdr); |
842 | + vid_hdr = NULL; | |
915 | 843 | spin_lock(&ubi->wl_lock); |
916 | 844 | if (scrubbing) |
917 | 845 | wl_tree_add(e1, &ubi->scrub); |
918 | 846 | else |
919 | 847 | wl_tree_add(e1, &ubi->used); |
848 | + ubi_assert(!ubi->move_to_put); | |
920 | 849 | ubi->move_from = ubi->move_to = NULL; |
921 | - ubi->move_to_put = ubi->wl_scheduled = 0; | |
850 | + ubi->wl_scheduled = 0; | |
922 | 851 | spin_unlock(&ubi->wl_lock); |
923 | 852 | |
924 | - err = schedule_erase(ubi, e2, 0); | |
853 | + e1 = NULL; | |
854 | + err = schedule_erase(ubi, e2, torture); | |
925 | 855 | if (err) |
926 | 856 | goto out_error; |
927 | 857 | |
... | ... | @@ -938,8 +868,10 @@ |
938 | 868 | ubi->move_to_put = ubi->wl_scheduled = 0; |
939 | 869 | spin_unlock(&ubi->wl_lock); |
940 | 870 | |
941 | - kmem_cache_free(ubi_wl_entry_slab, e1); | |
942 | - kmem_cache_free(ubi_wl_entry_slab, e2); | |
871 | + if (e1) | |
872 | + kmem_cache_free(ubi_wl_entry_slab, e1); | |
873 | + if (e2) | |
874 | + kmem_cache_free(ubi_wl_entry_slab, e2); | |
943 | 875 | ubi_ro_mode(ubi); |
944 | 876 | |
945 | 877 | mutex_unlock(&ubi->move_mutex); |
... | ... | @@ -988,7 +920,7 @@ |
988 | 920 | * erase counter of free physical eraseblocks is greater then |
989 | 921 | * %UBI_WL_THRESHOLD. |
990 | 922 | */ |
991 | - e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); | |
923 | + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); | |
992 | 924 | e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); |
993 | 925 | |
994 | 926 | if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) |
... | ... | @@ -1050,7 +982,6 @@ |
1050 | 982 | kfree(wl_wrk); |
1051 | 983 | |
1052 | 984 | spin_lock(&ubi->wl_lock); |
1053 | - ubi->abs_ec += 1; | |
1054 | 985 | wl_tree_add(e, &ubi->free); |
1055 | 986 | spin_unlock(&ubi->wl_lock); |
1056 | 987 | |
... | ... | @@ -1058,7 +989,7 @@ |
1058 | 989 | * One more erase operation has happened, take care about |
1059 | 990 | * protected physical eraseblocks. |
1060 | 991 | */ |
1061 | - check_protection_over(ubi); | |
992 | + serve_prot_queue(ubi); | |
1062 | 993 | |
1063 | 994 | /* And take care about wear-leveling */ |
1064 | 995 | err = ensure_wear_leveling(ubi); |
1065 | 996 | |
1066 | 997 | |
... | ... | @@ -1190,12 +1121,12 @@ |
1190 | 1121 | } else { |
1191 | 1122 | if (in_wl_tree(e, &ubi->used)) { |
1192 | 1123 | paranoid_check_in_wl_tree(e, &ubi->used); |
1193 | - rb_erase(&e->rb, &ubi->used); | |
1124 | + rb_erase(&e->u.rb, &ubi->used); | |
1194 | 1125 | } else if (in_wl_tree(e, &ubi->scrub)) { |
1195 | 1126 | paranoid_check_in_wl_tree(e, &ubi->scrub); |
1196 | - rb_erase(&e->rb, &ubi->scrub); | |
1127 | + rb_erase(&e->u.rb, &ubi->scrub); | |
1197 | 1128 | } else { |
1198 | - err = prot_tree_del(ubi, e->pnum); | |
1129 | + err = prot_queue_del(ubi, e->pnum); | |
1199 | 1130 | if (err) { |
1200 | 1131 | ubi_err("PEB %d not found", pnum); |
1201 | 1132 | ubi_ro_mode(ubi); |
1202 | 1133 | |
... | ... | @@ -1255,11 +1186,11 @@ |
1255 | 1186 | |
1256 | 1187 | if (in_wl_tree(e, &ubi->used)) { |
1257 | 1188 | paranoid_check_in_wl_tree(e, &ubi->used); |
1258 | - rb_erase(&e->rb, &ubi->used); | |
1189 | + rb_erase(&e->u.rb, &ubi->used); | |
1259 | 1190 | } else { |
1260 | 1191 | int err; |
1261 | 1192 | |
1262 | - err = prot_tree_del(ubi, e->pnum); | |
1193 | + err = prot_queue_del(ubi, e->pnum); | |
1263 | 1194 | if (err) { |
1264 | 1195 | ubi_err("PEB %d not found", pnum); |
1265 | 1196 | ubi_ro_mode(ubi); |
... | ... | @@ -1290,7 +1221,7 @@ |
1290 | 1221 | int err; |
1291 | 1222 | |
1292 | 1223 | /* |
1293 | - * Erase while the pending works queue is not empty, but not more then | |
1224 | + * Erase while the pending works queue is not empty, but not more than | |
1294 | 1225 | * the number of currently pending works. |
1295 | 1226 | */ |
1296 | 1227 | dbg_wl("flush (%d pending works)", ubi->works_count); |
... | ... | @@ -1308,7 +1239,7 @@ |
1308 | 1239 | up_write(&ubi->work_sem); |
1309 | 1240 | |
1310 | 1241 | /* |
1311 | - * And in case last was the WL worker and it cancelled the LEB | |
1242 | + * And in case last was the WL worker and it canceled the LEB | |
1312 | 1243 | * movement, flush again. |
1313 | 1244 | */ |
1314 | 1245 | while (ubi->works_count) { |
1315 | 1246 | |
... | ... | @@ -1337,11 +1268,11 @@ |
1337 | 1268 | else if (rb->rb_right) |
1338 | 1269 | rb = rb->rb_right; |
1339 | 1270 | else { |
1340 | - e = rb_entry(rb, struct ubi_wl_entry, rb); | |
1271 | + e = rb_entry(rb, struct ubi_wl_entry, u.rb); | |
1341 | 1272 | |
1342 | 1273 | rb = rb_parent(rb); |
1343 | 1274 | if (rb) { |
1344 | - if (rb->rb_left == &e->rb) | |
1275 | + if (rb->rb_left == &e->u.rb) | |
1345 | 1276 | rb->rb_left = NULL; |
1346 | 1277 | else |
1347 | 1278 | rb->rb_right = NULL; |
1348 | 1279 | |
1349 | 1280 | |
... | ... | @@ -1436,15 +1367,13 @@ |
1436 | 1367 | */ |
1437 | 1368 | int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) |
1438 | 1369 | { |
1439 | - int err; | |
1370 | + int err, i; | |
1440 | 1371 | struct rb_node *rb1, *rb2; |
1441 | 1372 | struct ubi_scan_volume *sv; |
1442 | 1373 | struct ubi_scan_leb *seb, *tmp; |
1443 | 1374 | struct ubi_wl_entry *e; |
1444 | 1375 | |
1445 | - | |
1446 | 1376 | ubi->used = ubi->free = ubi->scrub = RB_ROOT; |
1447 | - ubi->prot.pnum = ubi->prot.aec = RB_ROOT; | |
1448 | 1377 | spin_lock_init(&ubi->wl_lock); |
1449 | 1378 | mutex_init(&ubi->move_mutex); |
1450 | 1379 | init_rwsem(&ubi->work_sem); |
... | ... | @@ -1458,6 +1387,10 @@ |
1458 | 1387 | if (!ubi->lookuptbl) |
1459 | 1388 | return err; |
1460 | 1389 | |
1390 | + for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) | |
1391 | + INIT_LIST_HEAD(&ubi->pq[i]); | |
1392 | + ubi->pq_head = 0; | |
1393 | + | |
1461 | 1394 | list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { |
1462 | 1395 | cond_resched(); |
1463 | 1396 | |
1464 | 1397 | |
1465 | 1398 | |
1466 | 1399 | |
... | ... | @@ -1552,33 +1485,18 @@ |
1552 | 1485 | } |
1553 | 1486 | |
1554 | 1487 | /** |
1555 | - * protection_trees_destroy - destroy the protection RB-trees. | |
1488 | + * protection_queue_destroy - destroy the protection queue. | |
1556 | 1489 | * @ubi: UBI device description object |
1557 | 1490 | */ |
1558 | -static void protection_trees_destroy(struct ubi_device *ubi) | |
1491 | +static void protection_queue_destroy(struct ubi_device *ubi) | |
1559 | 1492 | { |
1560 | - struct rb_node *rb; | |
1561 | - struct ubi_wl_prot_entry *pe; | |
1493 | + int i; | |
1494 | + struct ubi_wl_entry *e, *tmp; | |
1562 | 1495 | |
1563 | - rb = ubi->prot.aec.rb_node; | |
1564 | - while (rb) { | |
1565 | - if (rb->rb_left) | |
1566 | - rb = rb->rb_left; | |
1567 | - else if (rb->rb_right) | |
1568 | - rb = rb->rb_right; | |
1569 | - else { | |
1570 | - pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); | |
1571 | - | |
1572 | - rb = rb_parent(rb); | |
1573 | - if (rb) { | |
1574 | - if (rb->rb_left == &pe->rb_aec) | |
1575 | - rb->rb_left = NULL; | |
1576 | - else | |
1577 | - rb->rb_right = NULL; | |
1578 | - } | |
1579 | - | |
1580 | - kmem_cache_free(ubi_wl_entry_slab, pe->e); | |
1581 | - kfree(pe); | |
1496 | + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { | |
1497 | + list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { | |
1498 | + list_del(&e->u.list); | |
1499 | + kmem_cache_free(ubi_wl_entry_slab, e); | |
1582 | 1500 | } |
1583 | 1501 | } |
1584 | 1502 | } |
... | ... | @@ -1591,7 +1509,7 @@ |
1591 | 1509 | { |
1592 | 1510 | dbg_wl("close the WL sub-system"); |
1593 | 1511 | cancel_pending(ubi); |
1594 | - protection_trees_destroy(ubi); | |
1512 | + protection_queue_destroy(ubi); | |
1595 | 1513 | tree_destroy(&ubi->used); |
1596 | 1514 | tree_destroy(&ubi->free); |
1597 | 1515 | tree_destroy(&ubi->scrub); |
... | ... | @@ -1661,5 +1579,28 @@ |
1661 | 1579 | return 1; |
1662 | 1580 | } |
1663 | 1581 | |
1582 | +/** | |
1583 | + * paranoid_check_in_pq - check if wear-leveling entry is in the protection | |
1584 | + * queue. | |
1585 | + * @ubi: UBI device description object | |
1586 | + * @e: the wear-leveling entry to check | |
1587 | + * | |
1588 | + * This function returns zero if @e is in @ubi->pq and %1 if it is not. | |
1589 | + */ | |
1590 | +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) | |
1591 | +{ | |
1592 | + struct ubi_wl_entry *p; | |
1593 | + int i; | |
1594 | + | |
1595 | + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) | |
1596 | + list_for_each_entry(p, &ubi->pq[i], u.list) | |
1597 | + if (p == e) | |
1598 | + return 0; | |
1599 | + | |
1600 | + ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue", | |
1601 | + e->pnum, e->ec); | |
1602 | + ubi_dbg_dump_stack(); | |
1603 | + return 1; | |
1604 | +} | |
1664 | 1605 | #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ |