13 Jun, 2018

1 commit

  • The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
    patch replaces cases of:

    kmalloc(a * b, gfp)

    with:
    kmalloc_array(a * b, gfp)

    as well as handling cases of:

    kmalloc(a * b * c, gfp)

    with:

    kmalloc(array3_size(a, b, c), gfp)

    as it's slightly less ugly than:

    kmalloc_array(array_size(a, b), c, gfp)

    This does, however, attempt to ignore constant size factors like:

    kmalloc(4 * 1024, gfp)

    though any constants defined via macros get caught up in the conversion.

    Any factors with a sizeof() of "unsigned char", "char", and "u8" were
    dropped, since they're redundant.

    The tools/ directory was manually excluded, since it has its own
    implementation of kmalloc().

    The Coccinelle script used for this was:

    // Fix redundant parens around sizeof().
    @@
    type TYPE;
    expression THING, E;
    @@

    (
    kmalloc(
    - (sizeof(TYPE)) * E
    + sizeof(TYPE) * E
    , ...)
    |
    kmalloc(
    - (sizeof(THING)) * E
    + sizeof(THING) * E
    , ...)
    )

    // Drop single-byte sizes and redundant parens.
    @@
    expression COUNT;
    typedef u8;
    typedef __u8;
    @@

    (
    kmalloc(
    - sizeof(u8) * (COUNT)
    + COUNT
    , ...)
    |
    kmalloc(
    - sizeof(__u8) * (COUNT)
    + COUNT
    , ...)
    |
    kmalloc(
    - sizeof(char) * (COUNT)
    + COUNT
    , ...)
    |
    kmalloc(
    - sizeof(unsigned char) * (COUNT)
    + COUNT
    , ...)
    |
    kmalloc(
    - sizeof(u8) * COUNT
    + COUNT
    , ...)
    |
    kmalloc(
    - sizeof(__u8) * COUNT
    + COUNT
    , ...)
    |
    kmalloc(
    - sizeof(char) * COUNT
    + COUNT
    , ...)
    |
    kmalloc(
    - sizeof(unsigned char) * COUNT
    + COUNT
    , ...)
    )

    // 2-factor product with sizeof(type/expression) and identifier or constant.
    @@
    type TYPE;
    expression THING;
    identifier COUNT_ID;
    constant COUNT_CONST;
    @@

    (
    - kmalloc
    + kmalloc_array
    (
    - sizeof(TYPE) * (COUNT_ID)
    + COUNT_ID, sizeof(TYPE)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(TYPE) * COUNT_ID
    + COUNT_ID, sizeof(TYPE)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(TYPE) * (COUNT_CONST)
    + COUNT_CONST, sizeof(TYPE)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(TYPE) * COUNT_CONST
    + COUNT_CONST, sizeof(TYPE)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(THING) * (COUNT_ID)
    + COUNT_ID, sizeof(THING)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(THING) * COUNT_ID
    + COUNT_ID, sizeof(THING)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(THING) * (COUNT_CONST)
    + COUNT_CONST, sizeof(THING)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(THING) * COUNT_CONST
    + COUNT_CONST, sizeof(THING)
    , ...)
    )

    // 2-factor product, only identifiers.
    @@
    identifier SIZE, COUNT;
    @@

    - kmalloc
    + kmalloc_array
    (
    - SIZE * COUNT
    + COUNT, SIZE
    , ...)

    // 3-factor product with 1 sizeof(type) or sizeof(expression), with
    // redundant parens removed.
    @@
    expression THING;
    identifier STRIDE, COUNT;
    type TYPE;
    @@

    (
    kmalloc(
    - sizeof(TYPE) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kmalloc(
    - sizeof(TYPE) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kmalloc(
    - sizeof(TYPE) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kmalloc(
    - sizeof(TYPE) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kmalloc(
    - sizeof(THING) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kmalloc(
    - sizeof(THING) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kmalloc(
    - sizeof(THING) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kmalloc(
    - sizeof(THING) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    )

    // 3-factor product with 2 sizeof(variable), with redundant parens removed.
    @@
    expression THING1, THING2;
    identifier COUNT;
    type TYPE1, TYPE2;
    @@

    (
    kmalloc(
    - sizeof(TYPE1) * sizeof(TYPE2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    kmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    kmalloc(
    - sizeof(THING1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    kmalloc(
    - sizeof(THING1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    kmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    |
    kmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    )

    // 3-factor product, only identifiers, with redundant parens removed.
    @@
    identifier STRIDE, SIZE, COUNT;
    @@

    (
    kmalloc(
    - (COUNT) * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kmalloc(
    - COUNT * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kmalloc(
    - COUNT * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kmalloc(
    - (COUNT) * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kmalloc(
    - COUNT * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kmalloc(
    - (COUNT) * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kmalloc(
    - (COUNT) * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kmalloc(
    - COUNT * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    )

    // Any remaining multi-factor products, first at least 3-factor products,
    // when they're not all constants...
    @@
    expression E1, E2, E3;
    constant C1, C2, C3;
    @@

    (
    kmalloc(C1 * C2 * C3, ...)
    |
    kmalloc(
    - (E1) * E2 * E3
    + array3_size(E1, E2, E3)
    , ...)
    |
    kmalloc(
    - (E1) * (E2) * E3
    + array3_size(E1, E2, E3)
    , ...)
    |
    kmalloc(
    - (E1) * (E2) * (E3)
    + array3_size(E1, E2, E3)
    , ...)
    |
    kmalloc(
    - E1 * E2 * E3
    + array3_size(E1, E2, E3)
    , ...)
    )

    // And then all remaining 2 factors products when they're not all constants,
    // keeping sizeof() as the second factor argument.
    @@
    expression THING, E1, E2;
    type TYPE;
    constant C1, C2, C3;
    @@

    (
    kmalloc(sizeof(THING) * C2, ...)
    |
    kmalloc(sizeof(TYPE) * C2, ...)
    |
    kmalloc(C1 * C2 * C3, ...)
    |
    kmalloc(C1 * C2, ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(TYPE) * (E2)
    + E2, sizeof(TYPE)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(TYPE) * E2
    + E2, sizeof(TYPE)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(THING) * (E2)
    + E2, sizeof(THING)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - sizeof(THING) * E2
    + E2, sizeof(THING)
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - (E1) * E2
    + E1, E2
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - (E1) * (E2)
    + E1, E2
    , ...)
    |
    - kmalloc
    + kmalloc_array
    (
    - E1 * E2
    + E1, E2
    , ...)
    )

    Signed-off-by: Kees Cook

    Kees Cook
     

06 Jun, 2018

1 commit

  • Pull xfs updates from Darrick Wong:
    "New features this cycle include the ability to relabel mounted
    filesystems, support for fallocated swapfiles, and using FUA for pure
    data O_DSYNC directio writes. With this cycle we begin to integrate
    online filesystem repair and refactor the growfs code in preparation
    for eventual subvolume support, though the road ahead for both
    features is quite long.

    There are also numerous refactorings of the iomap code to remove
    unnecessary log overhead, to disentangle some of the quota code, and
    to prepare for buffer head removal in a future upstream kernel.

    Metadata validation continues to improve, both in the hot path
    veifiers and the online filesystem check code. I anticipate sending a
    second pull request in a few days with more metadata validation
    improvements.

    This series has been run through a full xfstests run over the weekend
    and through a quick xfstests run against this morning's master, with
    no major failures reported.

    Summary:

    - Strengthen inode number and structure validation when allocating
    inodes.

    - Reduce pointless buffer allocations during cache miss

    - Use FUA for pure data O_DSYNC directio writes

    - Various iomap refactorings

    - Strengthen quota metadata verification to avoid unfixable broken
    quota

    - Make AGFL block freeing a deferred operation to avoid blowing out
    transaction reservations when running complex operations

    - Get rid of the log item descriptors to reduce log overhead

    - Fix various reflink bugs where inodes were double-joined to
    transactions

    - Don't issue discards when trimming unwritten extents

    - Refactor incore dquot initialization and retrieval interfaces

    - Fix some locking problmes in the quota scrub code

    - Strengthen btree structure checks in scrub code

    - Rewrite swapfile activation to use iomap and support unwritten
    extents

    - Make scrub exit to userspace sooner when corruptions or
    cross-referencing problems are found

    - Make scrub invoke the data fork scrubber directly on metadata
    inodes

    - Don't do background reclamation of post-eof and cow blocks when the
    fs is suspended

    - Fix secondary superblock buffer lifespan hinting

    - Refactor growfs to use table-dispatched functions instead of long
    stringy functions

    - Move growfs code to libxfs

    - Implement online fs label getting and setting

    - Introduce online filesystem repair (in a very limited capacity)

    - Fix unit conversion problems in the realtime freemap iteration
    functions

    - Various refactorings and cleanups in preparation to remove buffer
    heads in a future release

    - Reimplement the old bmap call with iomap

    - Remove direct buffer head accesses from seek hole/data

    - Various bug fixes"

    * tag 'xfs-4.18-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (121 commits)
    fs: use ->is_partially_uptodate in page_cache_seek_hole_data
    fs: remove the buffer_unwritten check in page_seek_hole_data
    fs: move page_cache_seek_hole_data to iomap.c
    xfs: use iomap_bmap
    iomap: add an iomap-based bmap implementation
    iomap: add a iomap_sector helper
    iomap: use __bio_add_page in iomap_dio_zero
    iomap: move IOMAP_F_BOUNDARY to gfs2
    iomap: fix the comment describing IOMAP_NOWAIT
    iomap: inline data should be an iomap type, not a flag
    mm: split ->readpages calls to avoid non-contiguous pages lists
    mm: return an unsigned int from __do_page_cache_readahead
    mm: give the 'ret' variable a better name __do_page_cache_readahead
    block: add a lower-level bio_add_page interface
    xfs: fix error handling in xfs_refcount_insert()
    xfs: fix xfs_rtalloc_rec units
    xfs: strengthen rtalloc query range checks
    xfs: xfs_rtbuf_get should check the bmapi_read results
    xfs: xfs_rtword_t should be unsigned, not signed
    dax: change bdev_dax_supported() to support boolean returns
    ...

    Linus Torvalds
     

04 Jun, 2018

7 commits

  • Clean up gfs2_iomap_alloc and gfs2_iomap_get. Document how
    gfs2_iomap_alloc works: it now needs to be called separately after
    gfs2_iomap_get where necessary; this will be used later by iomap write.
    Move gfs2_iomap_ops into bmap.c.

    Introduce a new gfs2_iomap_get_alloc helper and use it in
    fallocate_chunk: gfs2_iomap_begin will become unsuitable for fallocate
    with proper iomap write support.

    In gfs2_block_map and fallocate_chunk, zero-initialize struct iomap.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     
  • In journaled data mode, we need to add each buffer head to the current
    transaction. In ordered write mode, we only need to add the inode to
    the ordered inode list. So far, both cases are handled in
    gfs2_trans_add_data. This makes the code look misleading and is
    inefficient for small block sizes as well. Handle both cases separately
    instead.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     
  • First, change the sanity check in gfs2_stuffed_write_end to check for
    the actual write size instead of the requested write size.

    Second, use the existing teardown code in gfs2_write_end instead of
    duplicating it in gfs2_stuffed_write_end.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     
  • Reimplement function hole_size based on a generic function for walking
    the metadata tree and rename hole_size to gfs2_hole_size. While
    previously, multiple invocations of hole_size were sometimes needed to
    walk across the entire hole, the new implementation always returns the
    entire hole at once (provided that the caller is interested in the total
    size).

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     
  • Function gfs2_free_extlen calculates the length of an extent of
    free blocks that may be reserved. The end pointer was calculated as
    end = start + bh->b_size but b_size is incorrect because the
    bitmap usually stops prior to the end of the buffer data on
    the last bitmap.

    What this means is that when you do a write, you can reserve a
    chunk of blocks that runs off the end of the last bitmap. For
    example, I've got a file system where there is only one bitmap
    for each rgrp, so ri_length==1. I saw cases in which iozone
    tried to do a big write, grabbed a large block reservation,
    chose rgrp 5464152, which has ri_data0 5464153 and ri_data 8188.
    So 5464153 + 8188 = 5472341 which is the end of the rgrp.

    When it grabbed a reservation it got back: 5470936, length 7229.
    But 5470936 + 7229 = 5478165. So the reservation starts inside
    the rgrp but runs 5824 blocks past the end of the bitmap.

    This patch fixes the calculation so it won't exceed the last
    bitmap. It also adds a BUG_ON to guard against overflows in the
    future.

    Signed-off-by: Bob Peterson

    Bob Peterson
     
  • Before this patch function gfs2_write_begin, upon discovering an
    error, called gfs2_trim_blocks while the rgrp glock was still held.
    That's because gfs2_inplace_release is not called until later.
    This patch reorganizes the logic a bit so gfs2_inplace_release
    is called to release the lock prior to the call to gfs2_trim_blocks,
    thus preventing the glock recursion.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     
  • Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

02 Jun, 2018

2 commits


17 Apr, 2018

1 commit

  • GFS2 keeps two arrarys in the superblock that define the maximum size of
    an inode depending on the inode's height: sdp->sd_heightsize defines the
    heights in units of sb->s_blocksize; sdp->sd_jheightsize defines them in
    units of sb->s_blocksize - sizeof(struct gfs2_meta_header). These
    arrays are used to determine when additional layers of indirect blocks
    are needed. The second array is used for directories which have an
    additional gfs2_meta_header at the beginning of each block.

    Distinguishing between these two cases makes no sense: the height
    required for representing N blocks will come out the same no matter if
    the calculation is done in gross (sb->s_blocksize) or net
    (sb->s_blocksize - sizeof(struct gfs2_meta_header)) units.

    Stuffed directories don't have an additional gfs2_meta_header, but the
    stuffed case is handled separately for both files and directories,
    anyway.

    Remove the unncessary sdp->sd_jheightsize array.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

13 Apr, 2018

2 commits

  • This patch simply fixes some comments and the gfs2-glocks.txt file:
    Places where i_rwsem was called i_mutex, and adding i_rw_mutex.

    Signed-off-by: Bob Peterson

    Bob Peterson
     
  • Function rhashtable_walk_peek is problematic because there is no
    guarantee that the glock previously returned still exists; when that key
    is deleted, rhashtable_walk_peek can end up returning a different key,
    which will cause an inconsistent glock dump. Fix this by keeping track
    of the current glock in the seq file iterator functions instead.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

07 Apr, 2018

1 commit

  • Pull misc vfs updates from Al Viro:
    "Assorted stuff, including Christoph's I_DIRTY patches"

    * 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
    fs: move I_DIRTY_INODE to fs.h
    ubifs: fix bogus __mark_inode_dirty(I_DIRTY_SYNC | I_DIRTY_DATASYNC) call
    ntfs: fix bogus __mark_inode_dirty(I_DIRTY_SYNC | I_DIRTY_DATASYNC) call
    gfs2: fix bogus __mark_inode_dirty(I_DIRTY_SYNC | I_DIRTY_DATASYNC) calls
    fs: fold open_check_o_direct into do_dentry_open
    vfs: Replace stray non-ASCII homoglyph characters with their ASCII equivalents
    vfs: make sure struct filename->iname is word-aligned
    get rid of pointless includes of fs_struct.h
    [poll] annotate SAA6588_CMD_POLL users

    Linus Torvalds
     

30 Mar, 2018

1 commit

  • This patch spits out the time taken by the various steps in the
    journal recover process. Previously, the journal recovery time
    didn't account for finding the journal head in the log which takes
    up a significant portion of time.

    Signed-off-by: Abhi Das
    Signed-off-by: Bob Peterson

    Abhi Das
     

29 Mar, 2018

1 commit

  • Instead of zeroing out fallocated blocks in gfs2_iomap_alloc, zero them
    out in fallocate_chunk, much higher up the call stack. This gets rid of
    gfs2's abuse of the IOMAP_ZERO flag as well as the gfs2 specific zeronew
    buffer flag. I can't think of a reason why zeroing out the blocks in
    gfs2_iomap_alloc would have any benefits: there is no additional locking
    at that level that would add protection to the newly allocated blocks.

    While at it, change fallocate over from gs2_block_map to gfs2_iomap_begin.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson
    Acked-by: Christoph Hellwig

    Andreas Gruenbacher
     

28 Mar, 2018

2 commits


24 Mar, 2018

1 commit

  • When punching a hole or truncating an inode down to a given size, also
    check if the truncate point / start of the hole is within the range we
    have metadata for. Otherwise, we can end up freeing blocks that
    shouldn't be freed, corrupting the inode, or crashing the machine when
    trying to punch a hole into the void.

    When growing an inode via truncate, we set the new size but we don't
    allocate additional levels of indirect blocks and grow the inode height.
    When shrinking that inode again, the new size may still point beyond the
    end of the inode's metadata.

    Fixes xfstest generic/476.

    Debugged-by: Bob Peterson
    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

15 Mar, 2018

1 commit


09 Mar, 2018

5 commits

  • Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     
  • Before this patch, GFS2 was setting the PageChecked flag for ordered
    write pages. This is unnecessary. The ext3 file system only does it
    for jdata, and it's only used in jdata circumstances. It only muddies
    the already murky waters of writing pages in the aops.

    Signed-off-by: Bob Peterson

    Bob Peterson
     
  • Function gfs2_remove_from_ail is only ever used from log.c, so there
    is no reason to declare it extern. This patch removes the extern and
    declares it static.

    Signed-off-by: Bob Peterson

    Bob Peterson
     
  • Mark the source inode dirty during a rename instead of just updating the
    underlying buffer head. Otherwise, fsync may find the inode clean and
    will then skip flushing the journal. A subsequent power failure will
    cause the rename to be lost. This happens in command sequences like:

    xfs_io -f -c 'pwrite 0 4096' -c 'fsync' foo
    mv foo bar
    xfs_io -c 'fsync' bar
    # power failure

    Fixes xfstests generic/322, generic/376.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     
  • The chunk size of allocations in __gfs2_fallocate is calculated
    incorrectly. The size can collapse, causing __gfs2_fallocate to
    allocate one block at a time, which is very inefficient. This needs
    fixing in two places:

    In gfs2_quota_lock_check, always set ap->allowed to UINT_MAX to indicate
    that there is no quota limit. This fixes callers that rely on
    ap->allowed to be set even when quotas are off.

    In __gfs2_fallocate, reset max_blks to UINT_MAX in each iteration of the
    loop to make sure that allocation limits from one resource group won't
    spill over into another resource group.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

08 Mar, 2018

1 commit

  • It turns out that commit 3229c18c0d6b2 'Fixes to "Implement iomap for
    block_map"' introduced another bug in gfs2_iomap_begin that can cause
    gfs2_block_map to set bh->b_size of an actual buffer to 0. This can
    lead to arbitrary incorrect behavior including crashes or disk
    corruption. Revert the incorrect part of that commit.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

14 Feb, 2018

1 commit

  • It turns out that commit 3974320ca6 "Implement iomap for block_map"
    introduced a few bugs that trigger occasional failures with xfstest
    generic/476:

    In gfs2_iomap_begin, we jump to do_alloc when we determine that we are
    beyond the end of the allocated metadata (height > ip->i_height).
    There, we can end up calling hole_size with a metapath that doesn't
    match the current metadata tree, which doesn't make sense. After
    untangling the code at do_alloc, fix this by checking if the block we
    are looking for is within the range of allocated metadata.

    In addition, add a BUG() in case gfs2_iomap_begin is accidentally called
    for reading stuffed files: this is handled separately. Make sure we
    don't truncate iomap->length for reads beyond the end of the file; in
    that case, the entire range counts as a hole.

    Finally, revert to taking a bitmap write lock when doing allocations.
    It's unclear why that change didn't lead to any failures during testing.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

02 Feb, 2018

2 commits

  • Restore an optimization removed in commit 7f19449553 "Fix debugfs glocks
    dump": keep the glock hash table iterator active while the glock dump
    file is held open. This avoids having to rescan the hash table from the
    start for each read, with quadratically rising runtime.

    In addition, use rhastable_walk_peek for resuming a glock dump at the
    current position: when a glock doesn't fit in the provided buffer
    anymore, the next read must revisit the same glock.

    Finally, also restart the dump from the first entry when we notice that
    the hash table has been resized in gfs2_glock_seq_start.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     
  • Depend on LIBCRC32C which uses the crypto API to select the appropriate
    crc32c implementation. With the CRYPTO and CRYPTO_CRC32C dependencies,
    gfs2 would still need to use the crypto API directly like ext4 and btrfs
    do, which isn't necessary.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

01 Feb, 2018

1 commit

  • Pull networking updates from David Miller:

    1) Significantly shrink the core networking routing structures. Result
    of http://vger.kernel.org/~davem/seoul2017_netdev_keynote.pdf

    2) Add netdevsim driver for testing various offloads, from Jakub
    Kicinski.

    3) Support cross-chip FDB operations in DSA, from Vivien Didelot.

    4) Add a 2nd listener hash table for TCP, similar to what was done for
    UDP. From Martin KaFai Lau.

    5) Add eBPF based queue selection to tun, from Jason Wang.

    6) Lockless qdisc support, from John Fastabend.

    7) SCTP stream interleave support, from Xin Long.

    8) Smoother TCP receive autotuning, from Eric Dumazet.

    9) Lots of erspan tunneling enhancements, from William Tu.

    10) Add true function call support to BPF, from Alexei Starovoitov.

    11) Add explicit support for GRO HW offloading, from Michael Chan.

    12) Support extack generation in more netlink subsystems. From Alexander
    Aring, Quentin Monnet, and Jakub Kicinski.

    13) Add 1000BaseX, flow control, and EEE support to mvneta driver. From
    Russell King.

    14) Add flow table abstraction to netfilter, from Pablo Neira Ayuso.

    15) Many improvements and simplifications to the NFP driver bpf JIT,
    from Jakub Kicinski.

    16) Support for ipv6 non-equal cost multipath routing, from Ido
    Schimmel.

    17) Add resource abstration to devlink, from Arkadi Sharshevsky.

    18) Packet scheduler classifier shared filter block support, from Jiri
    Pirko.

    19) Avoid locking in act_csum, from Davide Caratti.

    20) devinet_ioctl() simplifications from Al viro.

    21) More TCP bpf improvements from Lawrence Brakmo.

    22) Add support for onlink ipv6 route flag, similar to ipv4, from David
    Ahern.

    * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1925 commits)
    tls: Add support for encryption using async offload accelerator
    ip6mr: fix stale iterator
    net/sched: kconfig: Remove blank help texts
    openvswitch: meter: Use 64-bit arithmetic instead of 32-bit
    tcp_nv: fix potential integer overflow in tcpnv_acked
    r8169: fix RTL8168EP take too long to complete driver initialization.
    qmi_wwan: Add support for Quectel EP06
    rtnetlink: enable IFLA_IF_NETNSID for RTM_NEWLINK
    ipmr: Fix ptrdiff_t print formatting
    ibmvnic: Wait for device response when changing MAC
    qlcnic: fix deadlock bug
    tcp: release sk_frag.page in tcp_disconnect
    ipv4: Get the address of interface correctly.
    net_sched: gen_estimator: fix lockdep splat
    net: macb: Handle HRESP error
    net/mlx5e: IPoIB, Fix copy-paste bug in flow steering refactoring
    ipv6: addrconf: break critical section in addrconf_verify_rtnl()
    ipv6: change route cache aging logic
    i40e/i40evf: Update DESC_NEEDED value to reflect larger value
    bnxt_en: cleanup DIM work on device shutdown
    ...

    Linus Torvalds
     

31 Jan, 2018

2 commits


30 Jan, 2018

1 commit

  • Before this patch, if function gfs2_unlink failed to get a valid
    transaction (for example, not enough journal blocks) it would go
    to label out_end_trans which did gfs2_trans_end. But if the
    trans_begin failed, there's no transaction to end, and trying to
    do so results in: kernel BUG at fs/gfs2/trans.c:117!

    This patch changes the goto so that it does not try to end a
    non-existent transaction.

    Signed-off-by: Bob Peterson

    Bob Peterson
     

26 Jan, 2018

1 commit


23 Jan, 2018

2 commits

  • This patch just adds the capability for GFS2 to track which function
    called gfs2_log_flush. This should make it easier to diagnose
    problems based on the sequence of events found in the journals.

    Signed-off-by: Bob Peterson
    Reviewed-by: Andreas Gruenbacher

    Bob Peterson
     
  • This patch adds a new structure called gfs2_log_header_v2 which is used
    to store expanded fields into previously unused areas of the log headers
    (i.e., this change is backwards compatible). Some of these are used for
    debug purposes so we can backtrack when problems occur. Others are
    reserved for future expansion.

    This patch is based on a prototype from Steve Whitehouse.

    Signed-off-by: Bob Peterson
    Signed-off-by: Andreas Gruenbacher

    Bob Peterson
     

22 Jan, 2018

1 commit

  • Get rid of gfs2_log_header_in by integrating it into get_log_header.
    Clean up the crc32 computations and use the same functions for encoding
    and decoding to make things less confusing. Eliminate lh_hash from
    gfs2_log_header_host which is completely useless.

    Signed-off-by: Andreas Gruenbacher
    Signed-off-by: Bob Peterson

    Andreas Gruenbacher
     

19 Jan, 2018

2 commits