Commit bd870a16c5946d86126f7203db3c73b71de0a1d8

Authored by NeilBrown
1 parent 749c55e942

md/raid10: Handle write errors by updating badblock log.

When we get a write error (in the data area, not in metadata),
update the badblock log rather than failing the whole device.

As the write may well be many blocks, we trying writing each
block individually and only log the ones which fail.

Signed-off-by: NeilBrown <neilb@suse.de>

Showing 2 changed files with 117 additions and 17 deletions Side-by-side Diff

... ... @@ -327,6 +327,16 @@
327 327 }
328 328 }
329 329  
  330 +static void close_write(r10bio_t *r10_bio)
  331 +{
  332 + /* clear the bitmap if all writes complete successfully */
  333 + bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
  334 + r10_bio->sectors,
  335 + !test_bit(R10BIO_Degraded, &r10_bio->state),
  336 + 0);
  337 + md_write_end(r10_bio->mddev);
  338 +}
  339 +
330 340 static void raid10_end_write_request(struct bio *bio, int error)
331 341 {
332 342 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
... ... @@ -342,9 +352,9 @@
342 352 * this branch is our 'one mirror IO has finished' event handler:
343 353 */
344 354 if (!uptodate) {
345   - md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
346   - /* an I/O failed, we can't clear the bitmap */
347   - set_bit(R10BIO_Degraded, &r10_bio->state);
  355 + set_bit(WriteErrorSeen, &conf->mirrors[dev].rdev->flags);
  356 + set_bit(R10BIO_WriteError, &r10_bio->state);
  357 + dec_rdev = 0;
348 358 } else {
349 359 /*
350 360 * Set R10BIO_Uptodate in our master bio, so that
351 361  
... ... @@ -378,16 +388,15 @@
378 388 * already.
379 389 */
380 390 if (atomic_dec_and_test(&r10_bio->remaining)) {
381   - /* clear the bitmap if all writes complete successfully */
382   - bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
383   - r10_bio->sectors,
384   - !test_bit(R10BIO_Degraded, &r10_bio->state),
385   - 0);
386   - md_write_end(r10_bio->mddev);
387   - if (test_bit(R10BIO_MadeGood, &r10_bio->state))
  391 + if (test_bit(R10BIO_WriteError, &r10_bio->state))
388 392 reschedule_retry(r10_bio);
389   - else
390   - raid_end_bio_io(r10_bio);
  393 + else {
  394 + close_write(r10_bio);
  395 + if (test_bit(R10BIO_MadeGood, &r10_bio->state))
  396 + reschedule_retry(r10_bio);
  397 + else
  398 + raid_end_bio_io(r10_bio);
  399 + }
391 400 }
392 401 if (dec_rdev)
393 402 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
... ... @@ -1839,6 +1848,82 @@
1839 1848 }
1840 1849 }
1841 1850  
  1851 +static void bi_complete(struct bio *bio, int error)
  1852 +{
  1853 + complete((struct completion *)bio->bi_private);
  1854 +}
  1855 +
  1856 +static int submit_bio_wait(int rw, struct bio *bio)
  1857 +{
  1858 + struct completion event;
  1859 + rw |= REQ_SYNC;
  1860 +
  1861 + init_completion(&event);
  1862 + bio->bi_private = &event;
  1863 + bio->bi_end_io = bi_complete;
  1864 + submit_bio(rw, bio);
  1865 + wait_for_completion(&event);
  1866 +
  1867 + return test_bit(BIO_UPTODATE, &bio->bi_flags);
  1868 +}
  1869 +
  1870 +static int narrow_write_error(r10bio_t *r10_bio, int i)
  1871 +{
  1872 + struct bio *bio = r10_bio->master_bio;
  1873 + mddev_t *mddev = r10_bio->mddev;
  1874 + conf_t *conf = mddev->private;
  1875 + mdk_rdev_t *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
  1876 + /* bio has the data to be written to slot 'i' where
  1877 + * we just recently had a write error.
  1878 + * We repeatedly clone the bio and trim down to one block,
  1879 + * then try the write. Where the write fails we record
  1880 + * a bad block.
  1881 + * It is conceivable that the bio doesn't exactly align with
  1882 + * blocks. We must handle this.
  1883 + *
  1884 + * We currently own a reference to the rdev.
  1885 + */
  1886 +
  1887 + int block_sectors;
  1888 + sector_t sector;
  1889 + int sectors;
  1890 + int sect_to_write = r10_bio->sectors;
  1891 + int ok = 1;
  1892 +
  1893 + if (rdev->badblocks.shift < 0)
  1894 + return 0;
  1895 +
  1896 + block_sectors = 1 << rdev->badblocks.shift;
  1897 + sector = r10_bio->sector;
  1898 + sectors = ((r10_bio->sector + block_sectors)
  1899 + & ~(sector_t)(block_sectors - 1))
  1900 + - sector;
  1901 +
  1902 + while (sect_to_write) {
  1903 + struct bio *wbio;
  1904 + if (sectors > sect_to_write)
  1905 + sectors = sect_to_write;
  1906 + /* Write at 'sector' for 'sectors' */
  1907 + wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
  1908 + md_trim_bio(wbio, sector - bio->bi_sector, sectors);
  1909 + wbio->bi_sector = (r10_bio->devs[i].addr+
  1910 + rdev->data_offset+
  1911 + (sector - r10_bio->sector));
  1912 + wbio->bi_bdev = rdev->bdev;
  1913 + if (submit_bio_wait(WRITE, wbio) == 0)
  1914 + /* Failure! */
  1915 + ok = rdev_set_badblocks(rdev, sector,
  1916 + sectors, 0)
  1917 + && ok;
  1918 +
  1919 + bio_put(wbio);
  1920 + sect_to_write -= sectors;
  1921 + sector += sectors;
  1922 + sectors = block_sectors;
  1923 + }
  1924 + return ok;
  1925 +}
  1926 +
1842 1927 static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio)
1843 1928 {
1844 1929 int slot = r10_bio->read_slot;
1845 1930  
1846 1931  
... ... @@ -1962,16 +2047,29 @@
1962 2047 }
1963 2048 put_buf(r10_bio);
1964 2049 } else {
1965   - for (m = 0; m < conf->copies; m++)
1966   - if (r10_bio->devs[m].bio == IO_MADE_GOOD) {
1967   - int dev = r10_bio->devs[m].devnum;
1968   - rdev = conf->mirrors[dev].rdev;
  2050 + for (m = 0; m < conf->copies; m++) {
  2051 + int dev = r10_bio->devs[m].devnum;
  2052 + struct bio *bio = r10_bio->devs[m].bio;
  2053 + rdev = conf->mirrors[dev].rdev;
  2054 + if (bio == IO_MADE_GOOD) {
1969 2055 rdev_clear_badblocks(
1970 2056 rdev,
1971 2057 r10_bio->devs[m].addr,
1972 2058 r10_bio->sectors);
1973 2059 rdev_dec_pending(rdev, conf->mddev);
  2060 + } else if (bio != NULL &&
  2061 + !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
  2062 + if (!narrow_write_error(r10_bio, m)) {
  2063 + md_error(conf->mddev, rdev);
  2064 + set_bit(R10BIO_Degraded,
  2065 + &r10_bio->state);
  2066 + }
  2067 + rdev_dec_pending(rdev, conf->mddev);
1974 2068 }
  2069 + }
  2070 + if (test_bit(R10BIO_WriteError,
  2071 + &r10_bio->state))
  2072 + close_write(r10_bio);
1975 2073 raid_end_bio_io(r10_bio);
1976 2074 }
1977 2075 }
... ... @@ -2003,7 +2101,8 @@
2003 2101  
2004 2102 mddev = r10_bio->mddev;
2005 2103 conf = mddev->private;
2006   - if (test_bit(R10BIO_MadeGood, &r10_bio->state))
  2104 + if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
  2105 + test_bit(R10BIO_WriteError, &r10_bio->state))
2007 2106 handle_write_completed(conf, r10_bio);
2008 2107 else if (test_bit(R10BIO_IsSync, &r10_bio->state))
2009 2108 sync_request_write(mddev, r10_bio);
... ... @@ -139,5 +139,6 @@
139 139 * known-bad-block records, we set this flag.
140 140 */
141 141 #define R10BIO_MadeGood 5
  142 +#define R10BIO_WriteError 6
142 143 #endif