13 Jun, 2018

1 commit

  • The kvmalloc() function has a 2-factor argument form, kvmalloc_array(). This
    patch replaces cases of:

    kvmalloc(a * b, gfp)

    with:
    kvmalloc_array(a * b, gfp)

    as well as handling cases of:

    kvmalloc(a * b * c, gfp)

    with:

    kvmalloc(array3_size(a, b, c), gfp)

    as it's slightly less ugly than:

    kvmalloc_array(array_size(a, b), c, gfp)

    This does, however, attempt to ignore constant size factors like:

    kvmalloc(4 * 1024, gfp)

    though any constants defined via macros get caught up in the conversion.

    Any factors with a sizeof() of "unsigned char", "char", and "u8" were
    dropped, since they're redundant.

    The Coccinelle script used for this was:

    // Fix redundant parens around sizeof().
    @@
    type TYPE;
    expression THING, E;
    @@

    (
    kvmalloc(
    - (sizeof(TYPE)) * E
    + sizeof(TYPE) * E
    , ...)
    |
    kvmalloc(
    - (sizeof(THING)) * E
    + sizeof(THING) * E
    , ...)
    )

    // Drop single-byte sizes and redundant parens.
    @@
    expression COUNT;
    typedef u8;
    typedef __u8;
    @@

    (
    kvmalloc(
    - sizeof(u8) * (COUNT)
    + COUNT
    , ...)
    |
    kvmalloc(
    - sizeof(__u8) * (COUNT)
    + COUNT
    , ...)
    |
    kvmalloc(
    - sizeof(char) * (COUNT)
    + COUNT
    , ...)
    |
    kvmalloc(
    - sizeof(unsigned char) * (COUNT)
    + COUNT
    , ...)
    |
    kvmalloc(
    - sizeof(u8) * COUNT
    + COUNT
    , ...)
    |
    kvmalloc(
    - sizeof(__u8) * COUNT
    + COUNT
    , ...)
    |
    kvmalloc(
    - sizeof(char) * COUNT
    + COUNT
    , ...)
    |
    kvmalloc(
    - sizeof(unsigned char) * COUNT
    + COUNT
    , ...)
    )

    // 2-factor product with sizeof(type/expression) and identifier or constant.
    @@
    type TYPE;
    expression THING;
    identifier COUNT_ID;
    constant COUNT_CONST;
    @@

    (
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(TYPE) * (COUNT_ID)
    + COUNT_ID, sizeof(TYPE)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(TYPE) * COUNT_ID
    + COUNT_ID, sizeof(TYPE)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(TYPE) * (COUNT_CONST)
    + COUNT_CONST, sizeof(TYPE)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(TYPE) * COUNT_CONST
    + COUNT_CONST, sizeof(TYPE)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(THING) * (COUNT_ID)
    + COUNT_ID, sizeof(THING)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(THING) * COUNT_ID
    + COUNT_ID, sizeof(THING)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(THING) * (COUNT_CONST)
    + COUNT_CONST, sizeof(THING)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(THING) * COUNT_CONST
    + COUNT_CONST, sizeof(THING)
    , ...)
    )

    // 2-factor product, only identifiers.
    @@
    identifier SIZE, COUNT;
    @@

    - kvmalloc
    + kvmalloc_array
    (
    - SIZE * COUNT
    + COUNT, SIZE
    , ...)

    // 3-factor product with 1 sizeof(type) or sizeof(expression), with
    // redundant parens removed.
    @@
    expression THING;
    identifier STRIDE, COUNT;
    type TYPE;
    @@

    (
    kvmalloc(
    - sizeof(TYPE) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kvmalloc(
    - sizeof(TYPE) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kvmalloc(
    - sizeof(TYPE) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kvmalloc(
    - sizeof(TYPE) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kvmalloc(
    - sizeof(THING) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kvmalloc(
    - sizeof(THING) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kvmalloc(
    - sizeof(THING) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kvmalloc(
    - sizeof(THING) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    )

    // 3-factor product with 2 sizeof(variable), with redundant parens removed.
    @@
    expression THING1, THING2;
    identifier COUNT;
    type TYPE1, TYPE2;
    @@

    (
    kvmalloc(
    - sizeof(TYPE1) * sizeof(TYPE2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    kvmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    kvmalloc(
    - sizeof(THING1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    kvmalloc(
    - sizeof(THING1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    kvmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    |
    kvmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    )

    // 3-factor product, only identifiers, with redundant parens removed.
    @@
    identifier STRIDE, SIZE, COUNT;
    @@

    (
    kvmalloc(
    - (COUNT) * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kvmalloc(
    - COUNT * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kvmalloc(
    - COUNT * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kvmalloc(
    - (COUNT) * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kvmalloc(
    - COUNT * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kvmalloc(
    - (COUNT) * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kvmalloc(
    - (COUNT) * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kvmalloc(
    - COUNT * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    )

    // Any remaining multi-factor products, first at least 3-factor products,
    // when they're not all constants...
    @@
    expression E1, E2, E3;
    constant C1, C2, C3;
    @@

    (
    kvmalloc(C1 * C2 * C3, ...)
    |
    kvmalloc(
    - (E1) * E2 * E3
    + array3_size(E1, E2, E3)
    , ...)
    |
    kvmalloc(
    - (E1) * (E2) * E3
    + array3_size(E1, E2, E3)
    , ...)
    |
    kvmalloc(
    - (E1) * (E2) * (E3)
    + array3_size(E1, E2, E3)
    , ...)
    |
    kvmalloc(
    - E1 * E2 * E3
    + array3_size(E1, E2, E3)
    , ...)
    )

    // And then all remaining 2 factors products when they're not all constants,
    // keeping sizeof() as the second factor argument.
    @@
    expression THING, E1, E2;
    type TYPE;
    constant C1, C2, C3;
    @@

    (
    kvmalloc(sizeof(THING) * C2, ...)
    |
    kvmalloc(sizeof(TYPE) * C2, ...)
    |
    kvmalloc(C1 * C2 * C3, ...)
    |
    kvmalloc(C1 * C2, ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(TYPE) * (E2)
    + E2, sizeof(TYPE)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(TYPE) * E2
    + E2, sizeof(TYPE)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(THING) * (E2)
    + E2, sizeof(THING)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - sizeof(THING) * E2
    + E2, sizeof(THING)
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - (E1) * E2
    + E1, E2
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - (E1) * (E2)
    + E1, E2
    , ...)
    |
    - kvmalloc
    + kvmalloc_array
    (
    - E1 * E2
    + E1, E2
    , ...)
    )

    Signed-off-by: Kees Cook

    Kees Cook
     

06 Jun, 2018

2 commits

  • Pull fscrypt updates from Ted Ts'o:
    "Add bunch of cleanups, and add support for the Speck128/256
    algorithms.

    Yes, Speck is contrversial, but the intention is to use them only for
    the lowest end Android devices, where the alternative *really* is no
    encryption at all for data stored at rest"

    * tag 'fscrypt_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt:
    fscrypt: log the crypto algorithm implementations
    fscrypt: add Speck128/256 support
    fscrypt: only derive the needed portion of the key
    fscrypt: separate key lookup from key derivation
    fscrypt: use a common logging function
    fscrypt: remove internal key size constants
    fscrypt: remove unnecessary check for non-logon key type
    fscrypt: make fscrypt_operations.max_namelen an integer
    fscrypt: drop empty name check from fname_decrypt()
    fscrypt: drop max_namelen check from fname_decrypt()
    fscrypt: don't special-case EOPNOTSUPP from fscrypt_get_encryption_info()
    fscrypt: don't clear flags on crypto transform
    fscrypt: remove stale comment from fscrypt_d_revalidate()
    fscrypt: remove error messages for skcipher_request_alloc() failure
    fscrypt: remove unnecessary NULL check when allocating skcipher
    fscrypt: clean up after fscrypt_prepare_lookup() conversions
    fs, fscrypt: only define ->s_cop when FS_ENCRYPTION is enabled
    fscrypt: use unbound workqueue for decryption

    Linus Torvalds
     
  • Pull xfs updates from Darrick Wong:
    "New features this cycle include the ability to relabel mounted
    filesystems, support for fallocated swapfiles, and using FUA for pure
    data O_DSYNC directio writes. With this cycle we begin to integrate
    online filesystem repair and refactor the growfs code in preparation
    for eventual subvolume support, though the road ahead for both
    features is quite long.

    There are also numerous refactorings of the iomap code to remove
    unnecessary log overhead, to disentangle some of the quota code, and
    to prepare for buffer head removal in a future upstream kernel.

    Metadata validation continues to improve, both in the hot path
    veifiers and the online filesystem check code. I anticipate sending a
    second pull request in a few days with more metadata validation
    improvements.

    This series has been run through a full xfstests run over the weekend
    and through a quick xfstests run against this morning's master, with
    no major failures reported.

    Summary:

    - Strengthen inode number and structure validation when allocating
    inodes.

    - Reduce pointless buffer allocations during cache miss

    - Use FUA for pure data O_DSYNC directio writes

    - Various iomap refactorings

    - Strengthen quota metadata verification to avoid unfixable broken
    quota

    - Make AGFL block freeing a deferred operation to avoid blowing out
    transaction reservations when running complex operations

    - Get rid of the log item descriptors to reduce log overhead

    - Fix various reflink bugs where inodes were double-joined to
    transactions

    - Don't issue discards when trimming unwritten extents

    - Refactor incore dquot initialization and retrieval interfaces

    - Fix some locking problmes in the quota scrub code

    - Strengthen btree structure checks in scrub code

    - Rewrite swapfile activation to use iomap and support unwritten
    extents

    - Make scrub exit to userspace sooner when corruptions or
    cross-referencing problems are found

    - Make scrub invoke the data fork scrubber directly on metadata
    inodes

    - Don't do background reclamation of post-eof and cow blocks when the
    fs is suspended

    - Fix secondary superblock buffer lifespan hinting

    - Refactor growfs to use table-dispatched functions instead of long
    stringy functions

    - Move growfs code to libxfs

    - Implement online fs label getting and setting

    - Introduce online filesystem repair (in a very limited capacity)

    - Fix unit conversion problems in the realtime freemap iteration
    functions

    - Various refactorings and cleanups in preparation to remove buffer
    heads in a future release

    - Reimplement the old bmap call with iomap

    - Remove direct buffer head accesses from seek hole/data

    - Various bug fixes"

    * tag 'xfs-4.18-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (121 commits)
    fs: use ->is_partially_uptodate in page_cache_seek_hole_data
    fs: remove the buffer_unwritten check in page_seek_hole_data
    fs: move page_cache_seek_hole_data to iomap.c
    xfs: use iomap_bmap
    iomap: add an iomap-based bmap implementation
    iomap: add a iomap_sector helper
    iomap: use __bio_add_page in iomap_dio_zero
    iomap: move IOMAP_F_BOUNDARY to gfs2
    iomap: fix the comment describing IOMAP_NOWAIT
    iomap: inline data should be an iomap type, not a flag
    mm: split ->readpages calls to avoid non-contiguous pages lists
    mm: return an unsigned int from __do_page_cache_readahead
    mm: give the 'ret' variable a better name __do_page_cache_readahead
    block: add a lower-level bio_add_page interface
    xfs: fix error handling in xfs_refcount_insert()
    xfs: fix xfs_rtalloc_rec units
    xfs: strengthen rtalloc query range checks
    xfs: xfs_rtbuf_get should check the bmapi_read results
    xfs: xfs_rtword_t should be unsigned, not signed
    dax: change bdev_dax_supported() to support boolean returns
    ...

    Linus Torvalds
     

31 May, 2018

2 commits

  • The function return values are confusing with the way the function is
    named. We expect a true or false return value but it actually returns
    0/-errno. This makes the code very confusing. Changing the return values
    to return a bool where if DAX is supported then return true and no DAX
    support returns false.

    Signed-off-by: Dave Jiang
    Signed-off-by: Ross Zwisler
    Reviewed-by: Darrick J. Wong
    Signed-off-by: Darrick J. Wong

    Dave Jiang
     
  • Change bdev_dax_supported so it takes a bdev parameter. This enables
    multi-device filesystems like xfs to check that a dax device can work for
    the particular filesystem. Once that's in place, actually fix all the
    parts of XFS where we need to be able to distinguish between datadev and
    rtdev.

    This patch fixes the problem where we screw up the dax support checking
    in xfs if the datadev and rtdev have different dax capabilities.

    Signed-off-by: Darrick J. Wong
    [rez: Re-added __bdev_dax_supported() for !CONFIG_FS_DAX cases]
    Signed-off-by: Ross Zwisler
    Reviewed-by: Eric Sandeen

    Darrick J. Wong
     

21 May, 2018

2 commits

  • This reserved space isn't committed yet but cannot be used for allocations.
    For userspace it has no difference from used space. XFS already does this.

    Signed-off-by: Konstantin Khlebnikov
    Signed-off-by: Theodore Ts'o
    Reviewed-by: Jan Kara
    Fixes: 689c958cbe6b ("ext4: add project quota support")

    Konstantin Khlebnikov
     
  • Now ->max_namelen() is only called to limit the filename length when
    adding NUL padding, and only for real filenames -- not symlink targets.
    It also didn't give the correct length for symlink targets anyway since
    it forgot to subtract 'sizeof(struct fscrypt_symlink_data)'.

    Thus, change ->max_namelen from a function to a simple 'unsigned int'
    that gives the filesystem's maximum filename length.

    Signed-off-by: Eric Biggers
    Signed-off-by: Theodore Ts'o

    Eric Biggers
     

14 May, 2018

1 commit

  • When remounting ext4 from ro to rw, currently it allows its transition,
    even if ext4_commit_super() returns EIO. Even worse thing is, after that,
    fs/buffer complains buffer dirty bits like:

    Call trace:
    [] mark_buffer_dirty+0x184/0x1a4
    [] __ext4_handle_dirty_super+0x4c/0xfc
    [] ext4_file_open+0x154/0x1c0
    [] do_dentry_open+0x114/0x2d0
    [] vfs_open+0x5c/0x94
    [] path_openat+0x668/0xfe8
    [] do_filp_open+0x74/0x120
    [] do_sys_open+0x148/0x254
    [] SyS_openat+0x10/0x18
    [] el0_svc_naked+0x24/0x28
    EXT4-fs (dm-1): previous I/O error to superblock detected
    Buffer I/O error on dev dm-1, logical block 0, lost sync page write
    EXT4-fs (dm-1): re-mounted. Opts: (null)
    Buffer I/O error on dev dm-1, logical block 80, lost async page write

    Signed-off-by: Jaegeuk Kim
    Signed-off-by: Theodore Ts'o

    Jaegeuk Kim
     

12 May, 2018

1 commit


26 Apr, 2018

1 commit


30 Mar, 2018

5 commits

  • Previously, mount -l would show data= even if the ext4 default
    journaling mode was being used. Change this to be consistent with the
    rest of the options.

    Ext4 already did the right thing when the journaling mode being used
    matched the one specified in the superblock's default mount options. The
    reason it failed to do the right thing for the ext4 defaults is that,
    when set, they were never included in sbi->s_def_mount_opt (unlike the
    superblock's defaults, which were).

    Signed-off-by: Tyson Nottingham
    Signed-off-by: Theodore Ts'o

    Tyson Nottingham
     
  • Don't show init_itable=n in /proc/fs/ext4//options when filesystem
    is mounted with noinit_itable.

    Signed-off-by: Tyson Nottingham
    Signed-off-by: Theodore Ts'o

    Tyson Nottingham
     
  • Previously, /proc/fs/ext4//options would only show binary options
    if they were set (1 in the options bit mask). E.g. it would show "grpid"
    if it was set, but it would not show "nogrpid" if grpid was not set.

    This seems sensible, but when an option is absent from the file, it can
    be hard for the unfamiliar to know what is being used. E.g. if there
    isn't a (no)grpid entry, nogrpid is in effect. But if there isn't a
    (no)auto_da_alloc entry, auto_da_alloc is in effect. If there isn't a
    (minixdf|bsddf) entry, it turns out bsddf is in effect. It all depends
    on how the option is implemented.

    It's clearer to be explicit, so print the corresponding option
    regardless of whether it means a 1 or a 0 in the bit mask.

    Note that options which do not have an explicit disable option aren't
    indicated as being disabled even with this change (e.g. dax).

    Signed-off-by: Tyson Nottingham
    Signed-off-by: Theodore Ts'o

    Tyson Nottingham
     
  • If some metadata block, such as an allocation bitmap, overlaps the
    superblock, it's very likely that if the file system is mounted
    read/write, the results will not be pretty. So disallow r/w mounts
    for file systems corrupted in this particular way.

    Signed-off-by: Theodore Ts'o
    Cc: stable@vger.kernel.org

    Theodore Ts'o
     
  • The extended attribute code now uses the crc32c checksum for hashing
    purposes, so we should just always always initialize it. We also want
    to prevent NULL pointer dereferences if one of the metadata checksum
    features is enabled after the file sytsem is originally mounted.

    This issue has been assigned CVE-2018-1094.

    https://bugzilla.kernel.org/show_bug.cgi?id=199183
    https://bugzilla.redhat.com/show_bug.cgi?id=1560788

    Signed-off-by: Theodore Ts'o
    Cc: stable@vger.kernel.org

    Theodore Ts'o
     

22 Mar, 2018

2 commits

  • If mount is auto-probing for filesystem type, it will try various
    filesystems in order, with the MS_SILENT flag set. We get
    that flag as the silent arg to ext4_fill_super.

    If we're probing (silent==1) then don't complain about feature
    incompatibilities that are found if it looks like it's actually
    a different valid extN type - failed probes should be silent
    in this case.

    If the on-disk features are unknown even to ext4, then complain.

    Reported-by: Joakim Tjernlund
    Tested-by: Joakim Tjernlund
    Signed-off-by: Eric Sandeen
    Signed-off-by: Theodore Ts'o
    Reviewed-by: Jan Kara

    Eric Sandeen
     
  • Commit 16c54688592c ("ext4: Allow parallel DIO reads") reworked the way
    locking happens around parallel dio reads. This resulted in obviating
    the need for EXT4_STATE_DIOREAD_LOCK flag and accompanying logic.
    Currently this amounts to dead code so let's remove it. No functional
    changes

    Signed-off-by: Nikolay Borisov
    Signed-off-by: Theodore Ts'o
    Reviewed-by: Jan Kara

    Nikolay Borisov
     

19 Feb, 2018

1 commit


07 Feb, 2018

1 commit

  • Pull libnvdimm updates from Ross Zwisler:

    - Require struct page by default for filesystem DAX to remove a number
    of surprising failure cases. This includes failures with direct I/O,
    gdb and fork(2).

    - Add support for the new Platform Capabilities Structure added to the
    NFIT in ACPI 6.2a. This new table tells us whether the platform
    supports flushing of CPU and memory controller caches on unexpected
    power loss events.

    - Revamp vmem_altmap and dev_pagemap handling to clean up code and
    better support future future PCI P2P uses.

    - Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has
    become out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL
    spec, and instead rely on the generic ND_CMD_CALL approach used by
    the two other IOCTL families, NVDIMM_FAMILY_{HPE,MSFT}.

    - Enhance nfit_test so we can test some of the new things added in
    version 1.6 of the DSM specification. This includes testing firmware
    download and simulating the Last Shutdown State (LSS) status.

    * tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (37 commits)
    libnvdimm, namespace: remove redundant initialization of 'nd_mapping'
    acpi, nfit: fix register dimm error handling
    libnvdimm, namespace: make min namespace size 4K
    tools/testing/nvdimm: force nfit_test to depend on instrumented modules
    libnvdimm/nfit_test: adding support for unit testing enable LSS status
    libnvdimm/nfit_test: add firmware download emulation
    nfit-test: Add platform cap support from ACPI 6.2a to test
    libnvdimm: expose platform persistence attribute for nd_region
    acpi: nfit: add persistent memory control flag for nd_region
    acpi: nfit: Add support for detect platform CPU cache flush on power loss
    device-dax: Fix trailing semicolon
    libnvdimm, btt: fix uninitialized err_lock
    dax: require 'struct page' by default for filesystem dax
    ext2: auto disable dax instead of failing mount
    ext4: auto disable dax instead of failing mount
    mm, dax: introduce pfn_t_special()
    mm: Fix devm_memremap_pages() collision handling
    mm: Fix memory size alignment in devm_memremap_pages_release()
    memremap: merge find_dev_pagemap into get_dev_pagemap
    memremap: change devm_memremap_pages interface to use struct dev_pagemap
    ...

    Linus Torvalds
     

05 Feb, 2018

1 commit

  • Pull fscrypt updates from Ted Ts'o:
    "Refactor support for encrypted symlinks to move common code to fscrypt"

    Ted also points out about the merge:
    "This makes the f2fs symlink code use the fscrypt_encrypt_symlink()
    from the fscrypt tree. This will end up dropping the kzalloc() ->
    f2fs_kzalloc() change, which means the fscrypt-specific allocation
    won't get tested by f2fs's kmalloc error injection system; which is
    fine"

    * tag 'fscrypt_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt: (26 commits)
    fscrypt: fix build with pre-4.6 gcc versions
    fscrypt: remove 'ci' parameter from fscrypt_put_encryption_info()
    fscrypt: document symlink length restriction
    fscrypt: fix up fscrypt_fname_encrypted_size() for internal use
    fscrypt: define fscrypt_fname_alloc_buffer() to be for presented names
    fscrypt: calculate NUL-padding length in one place only
    fscrypt: move fscrypt_symlink_data to fscrypt_private.h
    fscrypt: remove fscrypt_fname_usr_to_disk()
    ubifs: switch to fscrypt_get_symlink()
    ubifs: switch to fscrypt ->symlink() helper functions
    ubifs: free the encrypted symlink target
    f2fs: switch to fscrypt_get_symlink()
    f2fs: switch to fscrypt ->symlink() helper functions
    ext4: switch to fscrypt_get_symlink()
    ext4: switch to fscrypt ->symlink() helper functions
    fscrypt: new helper function - fscrypt_get_symlink()
    fscrypt: new helper functions for ->symlink()
    fscrypt: trim down fscrypt.h includes
    fscrypt: move fscrypt_is_dot_dotdot() to fs/crypto/fname.c
    fscrypt: move fscrypt_valid_enc_modes() to fscrypt_private.h
    ...

    Linus Torvalds
     

04 Feb, 2018

2 commits

  • Pull hardened usercopy whitelisting from Kees Cook:
    "Currently, hardened usercopy performs dynamic bounds checking on slab
    cache objects. This is good, but still leaves a lot of kernel memory
    available to be copied to/from userspace in the face of bugs.

    To further restrict what memory is available for copying, this creates
    a way to whitelist specific areas of a given slab cache object for
    copying to/from userspace, allowing much finer granularity of access
    control.

    Slab caches that are never exposed to userspace can declare no
    whitelist for their objects, thereby keeping them unavailable to
    userspace via dynamic copy operations. (Note, an implicit form of
    whitelisting is the use of constant sizes in usercopy operations and
    get_user()/put_user(); these bypass all hardened usercopy checks since
    these sizes cannot change at runtime.)

    This new check is WARN-by-default, so any mistakes can be found over
    the next several releases without breaking anyone's system.

    The series has roughly the following sections:
    - remove %p and improve reporting with offset
    - prepare infrastructure and whitelist kmalloc
    - update VFS subsystem with whitelists
    - update SCSI subsystem with whitelists
    - update network subsystem with whitelists
    - update process memory with whitelists
    - update per-architecture thread_struct with whitelists
    - update KVM with whitelists and fix ioctl bug
    - mark all other allocations as not whitelisted
    - update lkdtm for more sensible test overage"

    * tag 'usercopy-v4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (38 commits)
    lkdtm: Update usercopy tests for whitelisting
    usercopy: Restrict non-usercopy caches to size 0
    kvm: x86: fix KVM_XEN_HVM_CONFIG ioctl
    kvm: whitelist struct kvm_vcpu_arch
    arm: Implement thread_struct whitelist for hardened usercopy
    arm64: Implement thread_struct whitelist for hardened usercopy
    x86: Implement thread_struct whitelist for hardened usercopy
    fork: Provide usercopy whitelisting for task_struct
    fork: Define usercopy region in thread_stack slab caches
    fork: Define usercopy region in mm_struct slab caches
    net: Restrict unwhitelisted proto caches to size 0
    sctp: Copy struct sctp_sock.autoclose to userspace using put_user()
    sctp: Define usercopy region in SCTP proto slab cache
    caif: Define usercopy region in caif proto slab cache
    ip: Define usercopy region in IP proto slab cache
    net: Define usercopy region in struct proto slab cache
    scsi: Define usercopy region in scsi_sense_cache slab cache
    cifs: Define usercopy region in cifs_request slab cache
    vxfs: Define usercopy region in vxfs_inode slab cache
    ufs: Define usercopy region in ufs_inode_cache slab cache
    ...

    Linus Torvalds
     
  • Pull ext4 updates from Ted Ts'o:
    "Only miscellaneous cleanups and bug fixes for ext4 this cycle"

    * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
    ext4: create ext4_kset dynamically
    ext4: create ext4_feat kobject dynamically
    ext4: release kobject/kset even when init/register fail
    ext4: fix incorrect indentation of if statement
    ext4: correct documentation for grpid mount option
    ext4: use 'sbi' instead of 'EXT4_SB(sb)'
    ext4: save error to disk in __ext4_grp_locked_error()
    jbd2: fix sphinx kernel-doc build warnings
    ext4: fix a race in the ext4 shutdown path
    mbcache: make sure c_entry_count is not decremented past zero
    ext4: no need flush workqueue before destroying it
    ext4: fixed alignment and minor code cleanup in ext4.h
    ext4: fix ENOSPC handling in DAX page fault handler
    dax: pass detailed error code from dax_iomap_fault()
    mbcache: revert "fs/mbcache.c: make count_objects() more robust"
    mbcache: initialize entry->e_referenced in mb_cache_entry_create()
    ext4: fix up remaining files with SPDX cleanups

    Linus Torvalds
     

29 Jan, 2018

1 commit


20 Jan, 2018

1 commit

  • Bring the ext4 filesystem in line with xfs that only warns and continues
    when the "-o dax" option is specified to mount and the backing device
    does not support dax. This is in preparation for removing dax support
    from devices that do not enable get_user_pages() operations on dax
    mappings. In other words 'gup' support is required and configurations
    that were using so called 'page-less' dax will be converted back to
    using the page cache.

    Removing the broken 'page-less' dax support is a pre-requisite for
    removing the "EXPERIMENTAL" warning when mounting a filesystem in dax
    mode.

    Reviewed-by: Jan Kara
    Signed-off-by: Dan Williams

    Dan Williams
     

16 Jan, 2018

1 commit

  • The ext4 symlink pathnames, stored in struct ext4_inode_info.i_data
    and therefore contained in the ext4_inode_cache slab cache, need
    to be copied to/from userspace.

    cache object allocation:
    fs/ext4/super.c:
    ext4_alloc_inode(...):
    struct ext4_inode_info *ei;
    ...
    ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
    ...
    return &ei->vfs_inode;

    include/trace/events/ext4.h:
    #define EXT4_I(inode) \
    (container_of(inode, struct ext4_inode_info, vfs_inode))

    fs/ext4/namei.c:
    ext4_symlink(...):
    ...
    inode->i_link = (char *)&EXT4_I(inode)->i_data;

    example usage trace:
    readlink_copy+0x43/0x70
    vfs_readlink+0x62/0x110
    SyS_readlinkat+0x100/0x130

    fs/namei.c:
    readlink_copy(..., link):
    ...
    copy_to_user(..., link, len)

    (inlined into vfs_readlink)
    generic_readlink(dentry, ...):
    struct inode *inode = d_inode(dentry);
    const char *link = inode->i_link;
    ...
    readlink_copy(..., link);

    In support of usercopy hardening, this patch defines a region in the
    ext4_inode_cache slab cache in which userspace copy operations are
    allowed.

    This region is known as the slab cache's usercopy region. Slab caches
    can now check that each dynamically sized copy operation involving
    cache-managed memory falls entirely within the slab's usercopy region.

    This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY
    whitelisting code in the last public patch of grsecurity/PaX based on my
    understanding of the code. Changes or omissions from the original code are
    mine and don't reflect the original grsecurity/PaX code.

    Signed-off-by: David Windsor
    [kees: adjust commit log, provide usage trace]
    Cc: "Theodore Ts'o"
    Cc: Andreas Dilger
    Cc: linux-ext4@vger.kernel.org
    Signed-off-by: Kees Cook

    David Windsor
     

12 Jan, 2018

2 commits

  • fscrypt_put_encryption_info() is only called when evicting an inode, so
    the 'struct fscrypt_info *ci' parameter is always NULL, and there cannot
    be races with other threads. This was cruft left over from the broken
    key revocation code. Remove the unused parameter and the cmpxchg().

    Also remove the #ifdefs around the fscrypt_put_encryption_info() calls,
    since fscrypt_notsupp.h defines a no-op stub for it.

    Signed-off-by: Eric Biggers
    Signed-off-by: Theodore Ts'o

    Eric Biggers
     
  • We could use 'sbi' instead of 'EXT4_SB(sb)' to make code more elegant.

    Signed-off-by: Jun Piao
    Signed-off-by: Theodore Ts'o
    Reviewed-by: Jan Kara

    Jun Piao
     

10 Jan, 2018

2 commits

  • In the function __ext4_grp_locked_error(), __save_error_info()
    is called to save error info in super block block, but does not sync
    that information to disk to info the subsequence fsck after reboot.

    This patch writes the error information to disk. After this patch,
    I think there is no obvious EXT4 error handle branches which leads to
    "Remounting filesystem read-only" will leave the disk partition miss
    the subsequence fsck.

    Signed-off-by: Zhouyi Zhou
    Signed-off-by: Theodore Ts'o
    Cc: stable@vger.kernel.org

    Zhouyi Zhou
     
  • destroy_workqueue() will do flushing work for us.

    Signed-off-by: Jun Piao
    Signed-off-by: Theodore Ts'o
    Reviewed-by: Jan Kara

    piaojun
     

18 Dec, 2017

1 commit

  • A number of ext4 source files were skipped due because their copyright
    permission statements didn't match the expected text used by the
    automated conversion utilities. I've added SPDX tags for the rest.

    While looking at some of these files, I've noticed that we have quite
    a bit of variation on the licenses that were used --- in particular
    some of the Red Hat licenses on the jbd2 files use a GPL2+ license,
    and we have some files that have a LGPL-2.1 license (which was quite
    surprising).

    I've not attempted to do any license changes. Even if it is perfectly
    legal to relicense to GPL 2.0-only for consistency's sake, that should
    be done with ext4 developer community discussion.

    Signed-off-by: Theodore Ts'o

    Theodore Ts'o
     

28 Nov, 2017

1 commit

  • This is a pure automated search-and-replace of the internal kernel
    superblock flags.

    The s_flags are now called SB_*, with the names and the values for the
    moment mirroring the MS_* flags that they're equivalent to.

    Note how the MS_xyz flags are the ones passed to the mount system call,
    while the SB_xyz flags are what we then use in sb->s_flags.

    The script to do this was:

    # places to look in; re security/*: it generally should *not* be
    # touched (that stuff parses mount(2) arguments directly), but
    # there are two places where we really deal with superblock flags.
    FILES="drivers/mtd drivers/staging/lustre fs ipc mm \
    include/linux/fs.h include/uapi/linux/bfs_fs.h \
    security/apparmor/apparmorfs.c security/apparmor/include/lib.h"
    # the list of MS_... constants
    SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \
    DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \
    POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \
    I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \
    ACTIVE NOUSER"

    SED_PROG=
    for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done

    # we want files that contain at least one of MS_...,
    # with fs/namespace.c and fs/pnode.c excluded.
    L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c')

    for f in $L; do sed -i $f $SED_PROG; done

    Requested-by: Al Viro
    Signed-off-by: Linus Torvalds

    Linus Torvalds
     

15 Nov, 2017

2 commits

  • Pull ext4 updates from Ted Ts'o:

    - Add support for online resizing of file systems with bigalloc

    - Fix a two data corruption bugs involving DAX, as well as a corruption
    bug after a crash during a racing fallocate and delayed allocation.

    - Finally, a number of cleanups and optimizations.

    * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
    ext4: improve smp scalability for inode generation
    ext4: add support for online resizing with bigalloc
    ext4: mention noload when recovering on read-only device
    Documentation: fix little inconsistencies
    ext4: convert timers to use timer_setup()
    jbd2: convert timers to use timer_setup()
    ext4: remove duplicate extended attributes defs
    ext4: add ext4_should_use_dax()
    ext4: add sanity check for encryption + DAX
    ext4: prevent data corruption with journaling + DAX
    ext4: prevent data corruption with inline data + DAX
    ext4: fix interaction between i_size, fallocate, and delalloc after a crash
    ext4: retry allocations conservatively
    ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
    ext4: Add iomap support for inline data
    iomap: Add IOMAP_F_DATA_INLINE flag
    iomap: Switch from blkno to disk offset

    Linus Torvalds
     
  • Pull fscrypt updates from Ted Ts'o:
    "Lots of cleanups, mostly courtesy by Eric Biggers"

    * tag 'fscrypt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt:
    fscrypt: lock mutex before checking for bounce page pool
    fscrypt: add a documentation file for filesystem-level encryption
    ext4: switch to fscrypt_prepare_setattr()
    ext4: switch to fscrypt_prepare_lookup()
    ext4: switch to fscrypt_prepare_rename()
    ext4: switch to fscrypt_prepare_link()
    ext4: switch to fscrypt_file_open()
    fscrypt: new helper function - fscrypt_prepare_setattr()
    fscrypt: new helper function - fscrypt_prepare_lookup()
    fscrypt: new helper function - fscrypt_prepare_rename()
    fscrypt: new helper function - fscrypt_prepare_link()
    fscrypt: new helper function - fscrypt_file_open()
    fscrypt: new helper function - fscrypt_require_key()
    fscrypt: remove unneeded empty fscrypt_operations structs
    fscrypt: remove ->is_encrypted()
    fscrypt: switch from ->is_encrypted() to IS_ENCRYPTED()
    fs, fscrypt: add an S_ENCRYPTED inode flag
    fscrypt: clean up include file mess

    Linus Torvalds
     

09 Nov, 2017

1 commit


19 Oct, 2017

6 commits

  • In the case where a filesystem has been configured without encryption
    support, there is no longer any need to initialize ->s_cop at all, since
    none of the methods are ever called.

    Reviewed-by: Chao Yu
    Acked-by: Dave Chinner
    Signed-off-by: Eric Biggers
    Signed-off-by: Theodore Ts'o

    Eric Biggers
     
  • Now that all callers of fscrypt_operations.is_encrypted() have been
    switched to IS_ENCRYPTED(), remove ->is_encrypted().

    Reviewed-by: Chao Yu
    Acked-by: Dave Chinner
    Signed-off-by: Eric Biggers
    Signed-off-by: Theodore Ts'o

    Eric Biggers
     
  • Introduce a flag S_ENCRYPTED which can be set in ->i_flags to indicate
    that the inode is encrypted using the fscrypt (fs/crypto/) mechanism.

    Checking this flag will give the same information that
    inode->i_sb->s_cop->is_encrypted(inode) currently does, but will be more
    efficient. This will be useful for adding higher-level helper functions
    for filesystems to use. For example we'll be able to replace this:

    if (ext4_encrypted_inode(inode)) {
    ret = fscrypt_get_encryption_info(inode);
    if (ret)
    return ret;
    if (!fscrypt_has_encryption_key(inode))
    return -ENOKEY;
    }

    with this:

    ret = fscrypt_require_key(inode);
    if (ret)
    return ret;

    ... since we'll be able to retain the fast path for unencrypted files as
    a single flag check, using an inline function. This wasn't possible
    before because we'd have had to frequently call through the
    ->i_sb->s_cop->is_encrypted function pointer, even when the encryption
    support was disabled or not being used.

    Note: we don't define S_ENCRYPTED to 0 if CONFIG_FS_ENCRYPTION is
    disabled because we want to continue to return an error if an encrypted
    file is accessed without encryption support, rather than pretending that
    it is unencrypted.

    Reviewed-by: Chao Yu
    Acked-by: Dave Chinner
    Signed-off-by: Eric Biggers
    Signed-off-by: Theodore Ts'o

    Eric Biggers
     
  • [AV: in addition to the fix in previous commit]

    Signed-off-by: Matthew Garrett
    Cc: David Howells
    Cc: Alexander Viro
    Reviewed-by: David Howells
    Signed-off-by: Al Viro

    Matthew Garrett
     
  • Help the user to find the appropriate mount option to continue mounting
    the file system on a read-only device if the journal requires recovery.

    Signed-off-by: Simon Ruderich
    Signed-off-by: Theodore Ts'o

    Simon Ruderich
     
  • In preparation for unconditionally passing the struct timer_list pointer to
    all timer callbacks, switch to using the new timer_setup() and from_timer()
    to pass the timer pointer explicitly.

    Signed-off-by: Kees Cook
    Signed-off-by: Theodore Ts'o
    Reviewed-by: Reviewed-by: Jan Kara
    Cc: Andreas Dilger
    Cc: linux-ext4@vger.kernel.org

    Kees Cook