Eric Lee / smarc-fsl-linux-kernel

13 Jun, 2018

1 commit

344476e16 treewide: kvmalloc() -> kvmalloc_array() ... Browse Code »

The kvmalloc() function has a 2-factor argument form, kvmalloc_array(). This
patch replaces cases of:

kvmalloc(a * b, gfp)

with:
kvmalloc_array(a * b, gfp)

as well as handling cases of:

kvmalloc(a * b * c, gfp)

with:

kvmalloc(array3_size(a, b, c), gfp)

as it's slightly less ugly than:

kvmalloc_array(array_size(a, b), c, gfp)

This does, however, attempt to ignore constant size factors like:

kvmalloc(4 * 1024, gfp)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
kvmalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kvmalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
kvmalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kvmalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kvmalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

- kvmalloc
+ kvmalloc_array
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
kvmalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvmalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvmalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
kvmalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvmalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kvmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
kvmalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvmalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)

// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
kvmalloc(C1 * C2 * C3, ...)
|
kvmalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kvmalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)

// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@

(
kvmalloc(sizeof(THING) * C2, ...)
|
kvmalloc(sizeof(TYPE) * C2, ...)
|
kvmalloc(C1 * C2 * C3, ...)
|
kvmalloc(C1 * C2, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- (E1) * E2
+ E1, E2
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kvmalloc
+ kvmalloc_array
(
- E1 * E2
+ E1, E2
, ...)
)

Signed-off-by: Kees Cook

Kees Cook
2018-06-13 07:19:22 +0800

06 Jun, 2018

2 commits

fd59ccc53 Merge tag 'fscrypt_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt ... Browse Code »

Pull fscrypt updates from Ted Ts'o:
"Add bunch of cleanups, and add support for the Speck128/256
algorithms.

Yes, Speck is contrversial, but the intention is to use them only for
the lowest end Android devices, where the alternative *really* is no
encryption at all for data stored at rest"

* tag 'fscrypt_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt:
fscrypt: log the crypto algorithm implementations
fscrypt: add Speck128/256 support
fscrypt: only derive the needed portion of the key
fscrypt: separate key lookup from key derivation
fscrypt: use a common logging function
fscrypt: remove internal key size constants
fscrypt: remove unnecessary check for non-logon key type
fscrypt: make fscrypt_operations.max_namelen an integer
fscrypt: drop empty name check from fname_decrypt()
fscrypt: drop max_namelen check from fname_decrypt()
fscrypt: don't special-case EOPNOTSUPP from fscrypt_get_encryption_info()
fscrypt: don't clear flags on crypto transform
fscrypt: remove stale comment from fscrypt_d_revalidate()
fscrypt: remove error messages for skcipher_request_alloc() failure
fscrypt: remove unnecessary NULL check when allocating skcipher
fscrypt: clean up after fscrypt_prepare_lookup() conversions
fs, fscrypt: only define ->s_cop when FS_ENCRYPTION is enabled
fscrypt: use unbound workqueue for decryption

Linus Torvalds
2018-06-06 06:15:32 +0800
6567af78a Merge tag 'xfs-4.18-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux ... Browse Code »

Pull xfs updates from Darrick Wong:
"New features this cycle include the ability to relabel mounted
filesystems, support for fallocated swapfiles, and using FUA for pure
data O_DSYNC directio writes. With this cycle we begin to integrate
online filesystem repair and refactor the growfs code in preparation
for eventual subvolume support, though the road ahead for both
features is quite long.

There are also numerous refactorings of the iomap code to remove
unnecessary log overhead, to disentangle some of the quota code, and
to prepare for buffer head removal in a future upstream kernel.

Metadata validation continues to improve, both in the hot path
veifiers and the online filesystem check code. I anticipate sending a
second pull request in a few days with more metadata validation
improvements.

This series has been run through a full xfstests run over the weekend
and through a quick xfstests run against this morning's master, with
no major failures reported.

Summary:

- Strengthen inode number and structure validation when allocating
inodes.

- Reduce pointless buffer allocations during cache miss

- Use FUA for pure data O_DSYNC directio writes

- Various iomap refactorings

- Strengthen quota metadata verification to avoid unfixable broken
quota

- Make AGFL block freeing a deferred operation to avoid blowing out
transaction reservations when running complex operations

- Get rid of the log item descriptors to reduce log overhead

- Fix various reflink bugs where inodes were double-joined to
transactions

- Don't issue discards when trimming unwritten extents

- Refactor incore dquot initialization and retrieval interfaces

- Fix some locking problmes in the quota scrub code

- Strengthen btree structure checks in scrub code

- Rewrite swapfile activation to use iomap and support unwritten
extents

- Make scrub exit to userspace sooner when corruptions or
cross-referencing problems are found

- Make scrub invoke the data fork scrubber directly on metadata
inodes

- Don't do background reclamation of post-eof and cow blocks when the
fs is suspended

- Fix secondary superblock buffer lifespan hinting

- Refactor growfs to use table-dispatched functions instead of long
stringy functions

- Move growfs code to libxfs

- Implement online fs label getting and setting

- Introduce online filesystem repair (in a very limited capacity)

- Fix unit conversion problems in the realtime freemap iteration
functions

- Various refactorings and cleanups in preparation to remove buffer
heads in a future release

- Reimplement the old bmap call with iomap

- Remove direct buffer head accesses from seek hole/data

- Various bug fixes"

* tag 'xfs-4.18-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (121 commits)
fs: use ->is_partially_uptodate in page_cache_seek_hole_data
fs: remove the buffer_unwritten check in page_seek_hole_data
fs: move page_cache_seek_hole_data to iomap.c
xfs: use iomap_bmap
iomap: add an iomap-based bmap implementation
iomap: add a iomap_sector helper
iomap: use __bio_add_page in iomap_dio_zero
iomap: move IOMAP_F_BOUNDARY to gfs2
iomap: fix the comment describing IOMAP_NOWAIT
iomap: inline data should be an iomap type, not a flag
mm: split ->readpages calls to avoid non-contiguous pages lists
mm: return an unsigned int from __do_page_cache_readahead
mm: give the 'ret' variable a better name __do_page_cache_readahead
block: add a lower-level bio_add_page interface
xfs: fix error handling in xfs_refcount_insert()
xfs: fix xfs_rtalloc_rec units
xfs: strengthen rtalloc query range checks
xfs: xfs_rtbuf_get should check the bmapi_read results
xfs: xfs_rtword_t should be unsigned, not signed
dax: change bdev_dax_supported() to support boolean returns
...

Linus Torvalds
2018-06-06 04:24:20 +0800

31 May, 2018

2 commits

80660f202 dax: change bdev_dax_supported() to support boolean returns ... Browse Code »

The function return values are confusing with the way the function is
named. We expect a true or false return value but it actually returns
0/-errno. This makes the code very confusing. Changing the return values
to return a bool where if DAX is supported then return true and no DAX
support returns false.

Signed-off-by: Dave Jiang
Signed-off-by: Ross Zwisler
Reviewed-by: Darrick J. Wong
Signed-off-by: Darrick J. Wong

Dave Jiang
2018-05-31 23:58:34 +0800
ba23cba9b fs: allow per-device dax status checking for filesystems ... Browse Code »

Change bdev_dax_supported so it takes a bdev parameter. This enables
multi-device filesystems like xfs to check that a dax device can work for
the particular filesystem. Once that's in place, actually fix all the
parts of XFS where we need to be able to distinguish between datadev and
rtdev.

This patch fixes the problem where we screw up the dax support checking
in xfs if the datadev and rtdev have different dax capabilities.

Signed-off-by: Darrick J. Wong
[rez: Re-added __bdev_dax_supported() for !CONFIG_FS_DAX cases]
Signed-off-by: Ross Zwisler
Reviewed-by: Eric Sandeen

Darrick J. Wong
2018-05-31 23:58:33 +0800

21 May, 2018

2 commits

f06925c73 ext4: report delalloc reserve as non-free in statfs for project quota ... Browse Code »

This reserved space isn't committed yet but cannot be used for allocations.
For userspace it has no difference from used space. XFS already does this.

Signed-off-by: Konstantin Khlebnikov
Signed-off-by: Theodore Ts'o
Reviewed-by: Jan Kara
Fixes: 689c958cbe6b ("ext4: add project quota support")

Konstantin Khlebnikov
2018-05-21 10:49:54 +0800
e12ee6836 fscrypt: make fscrypt_operations.max_namelen an integer ... Browse Code »

Now ->max_namelen() is only called to limit the filename length when
adding NUL padding, and only for real filenames -- not symlink targets.
It also didn't give the correct length for symlink targets anyway since
it forgot to subtract 'sizeof(struct fscrypt_symlink_data)'.

Thus, change ->max_namelen from a function to a simple 'unsigned int'
that gives the filesystem's maximum filename length.

Signed-off-by: Eric Biggers
Signed-off-by: Theodore Ts'o

Eric Biggers
2018-05-21 04:21:03 +0800

14 May, 2018

1 commit

c89128a00 ext4: handle errors on ext4_commit_super ... Browse Code »

When remounting ext4 from ro to rw, currently it allows its transition,
even if ext4_commit_super() returns EIO. Even worse thing is, after that,
fs/buffer complains buffer dirty bits like:

Call trace:
[] mark_buffer_dirty+0x184/0x1a4
[] __ext4_handle_dirty_super+0x4c/0xfc
[] ext4_file_open+0x154/0x1c0
[] do_dentry_open+0x114/0x2d0
[] vfs_open+0x5c/0x94
[] path_openat+0x668/0xfe8
[] do_filp_open+0x74/0x120
[] do_sys_open+0x148/0x254
[] SyS_openat+0x10/0x18
[] el0_svc_naked+0x24/0x28
EXT4-fs (dm-1): previous I/O error to superblock detected
Buffer I/O error on dev dm-1, logical block 0, lost sync page write
EXT4-fs (dm-1): re-mounted. Opts: (null)
Buffer I/O error on dev dm-1, logical block 80, lost async page write

Signed-off-by: Jaegeuk Kim
Signed-off-by: Theodore Ts'o

Jaegeuk Kim
2018-05-14 11:02:19 +0800

12 May, 2018

1 commit

db79e6d1f ext4: add new ext4_mark_group_bitmap_corrupted() helper ... Browse Code »

Since there are many places to set inode/block bitmap
corrupt bit, add a new helper for it, which will make
codes more clear.

Signed-off-by: Wang Shilong
Signed-off-by: Theodore Ts'o
Reviewed-by: Andreas Dilger

Wang Shilong
2018-05-12 23:39:40 +0800

26 Apr, 2018

1 commit

7ef79ad52 ext4: add MODULE_SOFTDEP to ensure crc32c is included in the initramfs ... Browse Code »

Fixes: a45403b51582 ("ext4: always initialize the crc32c checksum driver")
Reported-by: François Valenduc
Signed-off-by: Theodore Ts'o
Cc: stable@vger.kernel.org

Theodore Ts'o
2018-04-26 12:44:46 +0800

30 Mar, 2018

5 commits

27f394a77 ext4: don't show data=<mode> option if defaulted ... Browse Code »

Previously, mount -l would show data= even if the ext4 default
journaling mode was being used. Change this to be consistent with the
rest of the options.

Ext4 already did the right thing when the journaling mode being used
matched the one specified in the superblock's default mount options. The
reason it failed to do the right thing for the ext4 defaults is that,
when set, they were never included in sbi->s_def_mount_opt (unlike the
superblock's defaults, which were).

Signed-off-by: Tyson Nottingham
Signed-off-by: Theodore Ts'o

Tyson Nottingham
2018-03-30 12:56:10 +0800
ceec03764 ext4: omit init_itable=n in procfs when disabled ... Browse Code »

Don't show init_itable=n in /proc/fs/ext4//options when filesystem
is mounted with noinit_itable.

Signed-off-by: Tyson Nottingham
Signed-off-by: Theodore Ts'o

Tyson Nottingham
2018-03-30 12:53:33 +0800
68afa7e08 ext4: show more binary mount options in procfs ... Browse Code »

Previously, /proc/fs/ext4//options would only show binary options
if they were set (1 in the options bit mask). E.g. it would show "grpid"
if it was set, but it would not show "nogrpid" if grpid was not set.

This seems sensible, but when an option is absent from the file, it can
be hard for the unfamiliar to know what is being used. E.g. if there
isn't a (no)grpid entry, nogrpid is in effect. But if there isn't a
(no)auto_da_alloc entry, auto_da_alloc is in effect. If there isn't a
(minixdf|bsddf) entry, it turns out bsddf is in effect. It all depends
on how the option is implemented.

It's clearer to be explicit, so print the corresponding option
regardless of whether it means a 1 or a 0 in the bit mask.

Note that options which do not have an explicit disable option aren't
indicated as being disabled even with this change (e.g. dax).

Signed-off-by: Tyson Nottingham
Signed-off-by: Theodore Ts'o

Tyson Nottingham
2018-03-30 12:51:10 +0800
18db4b4e6 ext4: don't allow r/w mounts if metadata blocks overlap the superblock ... Browse Code »

If some metadata block, such as an allocation bitmap, overlaps the
superblock, it's very likely that if the file system is mounted
read/write, the results will not be pretty. So disallow r/w mounts
for file systems corrupted in this particular way.

Signed-off-by: Theodore Ts'o
Cc: stable@vger.kernel.org

Theodore Ts'o
2018-03-30 10:10:35 +0800
a45403b51 ext4: always initialize the crc32c checksum driver ... Browse Code »

The extended attribute code now uses the crc32c checksum for hashing
purposes, so we should just always always initialize it. We also want
to prevent NULL pointer dereferences if one of the metadata checksum
features is enabled after the file sytsem is originally mounted.

This issue has been assigned CVE-2018-1094.

https://bugzilla.kernel.org/show_bug.cgi?id=199183
https://bugzilla.redhat.com/show_bug.cgi?id=1560788

Signed-off-by: Theodore Ts'o
Cc: stable@vger.kernel.org

Theodore Ts'o
2018-03-30 10:10:31 +0800

22 Mar, 2018

2 commits

0d9366d67 ext4: don't complain about incorrect features when probing ... Browse Code »

If mount is auto-probing for filesystem type, it will try various
filesystems in order, with the MS_SILENT flag set. We get
that flag as the silent arg to ext4_fill_super.

If we're probing (silent==1) then don't complain about feature
incompatibilities that are found if it looks like it's actually
a different valid extN type - failed probes should be silent
in this case.

If the on-disk features are unknown even to ext4, then complain.

Reported-by: Joakim Tjernlund
Tested-by: Joakim Tjernlund
Signed-off-by: Eric Sandeen
Signed-off-by: Theodore Ts'o
Reviewed-by: Jan Kara

Eric Sandeen
2018-03-22 23:59:00 +0800
1d39834fb ext4: remove EXT4_STATE_DIOREAD_LOCK flag ... Browse Code »

Commit 16c54688592c ("ext4: Allow parallel DIO reads") reworked the way
locking happens around parallel dio reads. This resulted in obviating
the need for EXT4_STATE_DIOREAD_LOCK flag and accompanying logic.
Currently this amounts to dead code so let's remove it. No functional
changes

Signed-off-by: Nikolay Borisov
Signed-off-by: Theodore Ts'o
Reviewed-by: Jan Kara

Nikolay Borisov
2018-03-22 23:52:10 +0800

19 Feb, 2018

1 commit

ccf0f32ac ext4: add tracepoints for shutdown and file system errors ... Browse Code »

Signed-off-by: Theodore Ts'o

Theodore Ts'o
2018-02-19 09:53:23 +0800

07 Feb, 2018

1 commit

3ff1b28ca Merge tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm ... Browse Code »

Pull libnvdimm updates from Ross Zwisler:

- Require struct page by default for filesystem DAX to remove a number
of surprising failure cases. This includes failures with direct I/O,
gdb and fork(2).

- Add support for the new Platform Capabilities Structure added to the
NFIT in ACPI 6.2a. This new table tells us whether the platform
supports flushing of CPU and memory controller caches on unexpected
power loss events.

- Revamp vmem_altmap and dev_pagemap handling to clean up code and
better support future future PCI P2P uses.

- Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has
become out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL
spec, and instead rely on the generic ND_CMD_CALL approach used by
the two other IOCTL families, NVDIMM_FAMILY_{HPE,MSFT}.

- Enhance nfit_test so we can test some of the new things added in
version 1.6 of the DSM specification. This includes testing firmware
download and simulating the Last Shutdown State (LSS) status.

* tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (37 commits)
libnvdimm, namespace: remove redundant initialization of 'nd_mapping'
acpi, nfit: fix register dimm error handling
libnvdimm, namespace: make min namespace size 4K
tools/testing/nvdimm: force nfit_test to depend on instrumented modules
libnvdimm/nfit_test: adding support for unit testing enable LSS status
libnvdimm/nfit_test: add firmware download emulation
nfit-test: Add platform cap support from ACPI 6.2a to test
libnvdimm: expose platform persistence attribute for nd_region
acpi: nfit: add persistent memory control flag for nd_region
acpi: nfit: Add support for detect platform CPU cache flush on power loss
device-dax: Fix trailing semicolon
libnvdimm, btt: fix uninitialized err_lock
dax: require 'struct page' by default for filesystem dax
ext2: auto disable dax instead of failing mount
ext4: auto disable dax instead of failing mount
mm, dax: introduce pfn_t_special()
mm: Fix devm_memremap_pages() collision handling
mm: Fix memory size alignment in devm_memremap_pages_release()
memremap: merge find_dev_pagemap into get_dev_pagemap
memremap: change devm_memremap_pages interface to use struct dev_pagemap
...

Linus Torvalds
2018-02-07 02:41:33 +0800

05 Feb, 2018

1 commit

3462ac570 Merge tag 'fscrypt_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt ... Browse Code »

Pull fscrypt updates from Ted Ts'o:
"Refactor support for encrypted symlinks to move common code to fscrypt"

Ted also points out about the merge:
"This makes the f2fs symlink code use the fscrypt_encrypt_symlink()
from the fscrypt tree. This will end up dropping the kzalloc() ->
f2fs_kzalloc() change, which means the fscrypt-specific allocation
won't get tested by f2fs's kmalloc error injection system; which is
fine"

* tag 'fscrypt_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt: (26 commits)
fscrypt: fix build with pre-4.6 gcc versions
fscrypt: remove 'ci' parameter from fscrypt_put_encryption_info()
fscrypt: document symlink length restriction
fscrypt: fix up fscrypt_fname_encrypted_size() for internal use
fscrypt: define fscrypt_fname_alloc_buffer() to be for presented names
fscrypt: calculate NUL-padding length in one place only
fscrypt: move fscrypt_symlink_data to fscrypt_private.h
fscrypt: remove fscrypt_fname_usr_to_disk()
ubifs: switch to fscrypt_get_symlink()
ubifs: switch to fscrypt ->symlink() helper functions
ubifs: free the encrypted symlink target
f2fs: switch to fscrypt_get_symlink()
f2fs: switch to fscrypt ->symlink() helper functions
ext4: switch to fscrypt_get_symlink()
ext4: switch to fscrypt ->symlink() helper functions
fscrypt: new helper function - fscrypt_get_symlink()
fscrypt: new helper functions for ->symlink()
fscrypt: trim down fscrypt.h includes
fscrypt: move fscrypt_is_dot_dotdot() to fs/crypto/fname.c
fscrypt: move fscrypt_valid_enc_modes() to fscrypt_private.h
...

Linus Torvalds
2018-02-05 02:43:12 +0800

04 Feb, 2018

2 commits

617aebe6a Merge tag 'usercopy-v4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux ... Browse Code »

Pull hardened usercopy whitelisting from Kees Cook:
"Currently, hardened usercopy performs dynamic bounds checking on slab
cache objects. This is good, but still leaves a lot of kernel memory
available to be copied to/from userspace in the face of bugs.

To further restrict what memory is available for copying, this creates
a way to whitelist specific areas of a given slab cache object for
copying to/from userspace, allowing much finer granularity of access
control.

Slab caches that are never exposed to userspace can declare no
whitelist for their objects, thereby keeping them unavailable to
userspace via dynamic copy operations. (Note, an implicit form of
whitelisting is the use of constant sizes in usercopy operations and
get_user()/put_user(); these bypass all hardened usercopy checks since
these sizes cannot change at runtime.)

This new check is WARN-by-default, so any mistakes can be found over
the next several releases without breaking anyone's system.

The series has roughly the following sections:
- remove %p and improve reporting with offset
- prepare infrastructure and whitelist kmalloc
- update VFS subsystem with whitelists
- update SCSI subsystem with whitelists
- update network subsystem with whitelists
- update process memory with whitelists
- update per-architecture thread_struct with whitelists
- update KVM with whitelists and fix ioctl bug
- mark all other allocations as not whitelisted
- update lkdtm for more sensible test overage"

* tag 'usercopy-v4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (38 commits)
lkdtm: Update usercopy tests for whitelisting
usercopy: Restrict non-usercopy caches to size 0
kvm: x86: fix KVM_XEN_HVM_CONFIG ioctl
kvm: whitelist struct kvm_vcpu_arch
arm: Implement thread_struct whitelist for hardened usercopy
arm64: Implement thread_struct whitelist for hardened usercopy
x86: Implement thread_struct whitelist for hardened usercopy
fork: Provide usercopy whitelisting for task_struct
fork: Define usercopy region in thread_stack slab caches
fork: Define usercopy region in mm_struct slab caches
net: Restrict unwhitelisted proto caches to size 0
sctp: Copy struct sctp_sock.autoclose to userspace using put_user()
sctp: Define usercopy region in SCTP proto slab cache
caif: Define usercopy region in caif proto slab cache
ip: Define usercopy region in IP proto slab cache
net: Define usercopy region in struct proto slab cache
scsi: Define usercopy region in scsi_sense_cache slab cache
cifs: Define usercopy region in cifs_request slab cache
vxfs: Define usercopy region in vxfs_inode slab cache
ufs: Define usercopy region in ufs_inode_cache slab cache
...

Linus Torvalds
2018-02-04 08:25:42 +0800
23aedc4b9 Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 ... Browse Code »

Pull ext4 updates from Ted Ts'o:
"Only miscellaneous cleanups and bug fixes for ext4 this cycle"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: create ext4_kset dynamically
ext4: create ext4_feat kobject dynamically
ext4: release kobject/kset even when init/register fail
ext4: fix incorrect indentation of if statement
ext4: correct documentation for grpid mount option
ext4: use 'sbi' instead of 'EXT4_SB(sb)'
ext4: save error to disk in __ext4_grp_locked_error()
jbd2: fix sphinx kernel-doc build warnings
ext4: fix a race in the ext4 shutdown path
mbcache: make sure c_entry_count is not decremented past zero
ext4: no need flush workqueue before destroying it
ext4: fixed alignment and minor code cleanup in ext4.h
ext4: fix ENOSPC handling in DAX page fault handler
dax: pass detailed error code from dax_iomap_fault()
mbcache: revert "fs/mbcache.c: make count_objects() more robust"
mbcache: initialize entry->e_referenced in mb_cache_entry_create()
ext4: fix up remaining files with SPDX cleanups

Linus Torvalds
2018-02-04 05:49:22 +0800

29 Jan, 2018

1 commit

ee73f9a52 ext4: convert to new i_version API ... Browse Code »

Signed-off-by: Jeff Layton
Acked-by: Theodore Ts'o

Jeff Layton
2018-01-29 19:42:21 +0800

20 Jan, 2018

1 commit

24f3478d6 ext4: auto disable dax instead of failing mount ... Browse Code »

Bring the ext4 filesystem in line with xfs that only warns and continues
when the "-o dax" option is specified to mount and the backing device
does not support dax. This is in preparation for removing dax support
from devices that do not enable get_user_pages() operations on dax
mappings. In other words 'gup' support is required and configurations
that were using so called 'page-less' dax will be converted back to
using the page cache.

Removing the broken 'page-less' dax support is a pre-requisite for
removing the "EXPERIMENTAL" warning when mounting a filesystem in dax
mode.

Reviewed-by: Jan Kara
Signed-off-by: Dan Williams

Dan Williams
2018-01-20 08:50:53 +0800

16 Jan, 2018

1 commit

f8dd7c708 ext4: Define usercopy region in ext4_inode_cache slab cache ... Browse Code »

The ext4 symlink pathnames, stored in struct ext4_inode_info.i_data
and therefore contained in the ext4_inode_cache slab cache, need
to be copied to/from userspace.

cache object allocation:
fs/ext4/super.c:
ext4_alloc_inode(...):
struct ext4_inode_info *ei;
...
ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
...
return &ei->vfs_inode;

include/trace/events/ext4.h:
#define EXT4_I(inode) \
(container_of(inode, struct ext4_inode_info, vfs_inode))

fs/ext4/namei.c:
ext4_symlink(...):
...
inode->i_link = (char *)&EXT4_I(inode)->i_data;

example usage trace:
readlink_copy+0x43/0x70
vfs_readlink+0x62/0x110
SyS_readlinkat+0x100/0x130

fs/namei.c:
readlink_copy(..., link):
...
copy_to_user(..., link, len)

(inlined into vfs_readlink)
generic_readlink(dentry, ...):
struct inode *inode = d_inode(dentry);
const char *link = inode->i_link;
...
readlink_copy(..., link);

In support of usercopy hardening, this patch defines a region in the
ext4_inode_cache slab cache in which userspace copy operations are
allowed.

This region is known as the slab cache's usercopy region. Slab caches
can now check that each dynamically sized copy operation involving
cache-managed memory falls entirely within the slab's usercopy region.

This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY
whitelisting code in the last public patch of grsecurity/PaX based on my
understanding of the code. Changes or omissions from the original code are
mine and don't reflect the original grsecurity/PaX code.

Signed-off-by: David Windsor
[kees: adjust commit log, provide usage trace]
Cc: "Theodore Ts'o"
Cc: Andreas Dilger
Cc: linux-ext4@vger.kernel.org
Signed-off-by: Kees Cook

David Windsor
2018-01-16 04:07:52 +0800

12 Jan, 2018

2 commits

3d204e24d fscrypt: remove 'ci' parameter from fscrypt_put_encryption_info() ... Browse Code »

fscrypt_put_encryption_info() is only called when evicting an inode, so
the 'struct fscrypt_info *ci' parameter is always NULL, and there cannot
be races with other threads. This was cruft left over from the broken
key revocation code. Remove the unused parameter and the cmpxchg().

Also remove the #ifdefs around the fscrypt_put_encryption_info() calls,
since fscrypt_notsupp.h defines a no-op stub for it.

Signed-off-by: Eric Biggers
Signed-off-by: Theodore Ts'o

Eric Biggers
2018-01-12 12:30:13 +0800
49598e04b ext4: use 'sbi' instead of 'EXT4_SB(sb)' ... Browse Code »

We could use 'sbi' instead of 'EXT4_SB(sb)' to make code more elegant.

Signed-off-by: Jun Piao
Signed-off-by: Theodore Ts'o
Reviewed-by: Jan Kara

Jun Piao
2018-01-12 02:17:49 +0800

10 Jan, 2018

2 commits

06f29cc81 ext4: save error to disk in __ext4_grp_locked_error() ... Browse Code »

In the function __ext4_grp_locked_error(), __save_error_info()
is called to save error info in super block block, but does not sync
that information to disk to info the subsequence fsck after reboot.

This patch writes the error information to disk. After this patch,
I think there is no obvious EXT4 error handle branches which leads to
"Remounting filesystem read-only" will leave the disk partition miss
the subsequence fsck.

Signed-off-by: Zhouyi Zhou
Signed-off-by: Theodore Ts'o
Cc: stable@vger.kernel.org

Zhouyi Zhou
2018-01-10 13:34:19 +0800
a90ac0f5d ext4: no need flush workqueue before destroying it ... Browse Code »

destroy_workqueue() will do flushing work for us.

Signed-off-by: Jun Piao
Signed-off-by: Theodore Ts'o
Reviewed-by: Jan Kara

piaojun
2018-01-10 10:32:41 +0800

18 Dec, 2017

1 commit

f51667685 ext4: fix up remaining files with SPDX cleanups ... Browse Code »

A number of ext4 source files were skipped due because their copyright
permission statements didn't match the expected text used by the
automated conversion utilities. I've added SPDX tags for the rest.

While looking at some of these files, I've noticed that we have quite
a bit of variation on the licenses that were used --- in particular
some of the Red Hat licenses on the jbd2 files use a GPL2+ license,
and we have some files that have a LGPL-2.1 license (which was quite
surprising).

I've not attempted to do any license changes. Even if it is perfectly
legal to relicense to GPL 2.0-only for consistency's sake, that should
be done with ext4 developer community discussion.

Signed-off-by: Theodore Ts'o

Theodore Ts'o
2017-12-18 11:00:59 +0800

28 Nov, 2017

1 commit

1751e8a6c Rename superblock flags (MS_xyz -> SB_xyz) ... Browse Code »

This is a pure automated search-and-replace of the internal kernel
superblock flags.

The s_flags are now called SB_*, with the names and the values for the
moment mirroring the MS_* flags that they're equivalent to.

Note how the MS_xyz flags are the ones passed to the mount system call,
while the SB_xyz flags are what we then use in sb->s_flags.

The script to do this was:

# places to look in; re security/*: it generally should *not* be
# touched (that stuff parses mount(2) arguments directly), but
# there are two places where we really deal with superblock flags.
FILES="drivers/mtd drivers/staging/lustre fs ipc mm \
include/linux/fs.h include/uapi/linux/bfs_fs.h \
security/apparmor/apparmorfs.c security/apparmor/include/lib.h"
# the list of MS_... constants
SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \
DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \
POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \
I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \
ACTIVE NOUSER"

SED_PROG=
for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done

# we want files that contain at least one of MS_...,
# with fs/namespace.c and fs/pnode.c excluded.
L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c')

for f in $L; do sed -i $f $SED_PROG; done

Requested-by: Al Viro
Signed-off-by: Linus Torvalds

Linus Torvalds
2017-11-28 05:05:09 +0800

15 Nov, 2017

2 commits

ae9a8c4bd Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 ... Browse Code »

Pull ext4 updates from Ted Ts'o:

- Add support for online resizing of file systems with bigalloc

- Fix a two data corruption bugs involving DAX, as well as a corruption
bug after a crash during a racing fallocate and delayed allocation.

- Finally, a number of cleanups and optimizations.

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: improve smp scalability for inode generation
ext4: add support for online resizing with bigalloc
ext4: mention noload when recovering on read-only device
Documentation: fix little inconsistencies
ext4: convert timers to use timer_setup()
jbd2: convert timers to use timer_setup()
ext4: remove duplicate extended attributes defs
ext4: add ext4_should_use_dax()
ext4: add sanity check for encryption + DAX
ext4: prevent data corruption with journaling + DAX
ext4: prevent data corruption with inline data + DAX
ext4: fix interaction between i_size, fallocate, and delalloc after a crash
ext4: retry allocations conservatively
ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
ext4: Add iomap support for inline data
iomap: Add IOMAP_F_DATA_INLINE flag
iomap: Switch from blkno to disk offset

Linus Torvalds
2017-11-15 04:59:42 +0800
32190f0af Merge tag 'fscrypt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt ... Browse Code »

Pull fscrypt updates from Ted Ts'o:
"Lots of cleanups, mostly courtesy by Eric Biggers"

* tag 'fscrypt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt:
fscrypt: lock mutex before checking for bounce page pool
fscrypt: add a documentation file for filesystem-level encryption
ext4: switch to fscrypt_prepare_setattr()
ext4: switch to fscrypt_prepare_lookup()
ext4: switch to fscrypt_prepare_rename()
ext4: switch to fscrypt_prepare_link()
ext4: switch to fscrypt_file_open()
fscrypt: new helper function - fscrypt_prepare_setattr()
fscrypt: new helper function - fscrypt_prepare_lookup()
fscrypt: new helper function - fscrypt_prepare_rename()
fscrypt: new helper function - fscrypt_prepare_link()
fscrypt: new helper function - fscrypt_file_open()
fscrypt: new helper function - fscrypt_require_key()
fscrypt: remove unneeded empty fscrypt_operations structs
fscrypt: remove ->is_encrypted()
fscrypt: switch from ->is_encrypted() to IS_ENCRYPTED()
fs, fscrypt: add an S_ENCRYPTED inode flag
fscrypt: clean up include file mess

Linus Torvalds
2017-11-15 03:35:15 +0800

09 Nov, 2017

1 commit

232530680 ext4: improve smp scalability for inode generation ... Browse Code »

->s_next_generation is protected by s_next_gen_lock but its usage
pattern is very primitive. We don't actually need sequentially
increasing new generation numbers, so let's use prandom_u32() instead.

Reported-by: Dmitry Monakhov
Signed-off-by: Theodore Ts'o

Theodore Ts'o
2017-11-09 11:23:20 +0800

19 Oct, 2017

6 commits

ffcc41829 fscrypt: remove unneeded empty fscrypt_operations structs ... Browse Code »

In the case where a filesystem has been configured without encryption
support, there is no longer any need to initialize ->s_cop at all, since
none of the methods are ever called.

Reviewed-by: Chao Yu
Acked-by: Dave Chinner
Signed-off-by: Eric Biggers
Signed-off-by: Theodore Ts'o

Eric Biggers
2017-10-19 07:52:37 +0800
f7293e48b fscrypt: remove ->is_encrypted() ... Browse Code »

Now that all callers of fscrypt_operations.is_encrypted() have been
switched to IS_ENCRYPTED(), remove ->is_encrypted().

Reviewed-by: Chao Yu
Acked-by: Dave Chinner
Signed-off-by: Eric Biggers
Signed-off-by: Theodore Ts'o

Eric Biggers
2017-10-19 07:52:37 +0800
2ee6a576b fs, fscrypt: add an S_ENCRYPTED inode flag ... Browse Code »

Introduce a flag S_ENCRYPTED which can be set in ->i_flags to indicate
that the inode is encrypted using the fscrypt (fs/crypto/) mechanism.

Checking this flag will give the same information that
inode->i_sb->s_cop->is_encrypted(inode) currently does, but will be more
efficient. This will be useful for adding higher-level helper functions
for filesystems to use. For example we'll be able to replace this:

if (ext4_encrypted_inode(inode)) {
ret = fscrypt_get_encryption_info(inode);
if (ret)
return ret;
if (!fscrypt_has_encryption_key(inode))
return -ENOKEY;
}

with this:

ret = fscrypt_require_key(inode);
if (ret)
return ret;

... since we'll be able to retain the fast path for unencrypted files as
a single flag check, using an inline function. This wasn't possible
before because we'd have had to frequently call through the
->i_sb->s_cop->is_encrypted function pointer, even when the encryption
support was disabled or not being used.

Note: we don't define S_ENCRYPTED to 0 if CONFIG_FS_ENCRYPTION is
disabled because we want to continue to return an error if an encrypted
file is accessed without encryption support, rather than pretending that
it is unencrypted.

Reviewed-by: Chao Yu
Acked-by: Dave Chinner
Signed-off-by: Eric Biggers
Signed-off-by: Theodore Ts'o

Eric Biggers
2017-10-19 07:52:36 +0800
357fdad07 Convert fs/*/* to SB_I_VERSION ... Browse Code »

[AV: in addition to the fix in previous commit]

Signed-off-by: Matthew Garrett
Cc: David Howells
Cc: Alexander Viro
Reviewed-by: David Howells
Signed-off-by: Al Viro

Matthew Garrett
2017-10-19 06:51:27 +0800
d98bf8cd1 ext4: mention noload when recovering on read-only device ... Browse Code »

Help the user to find the appropriate mount option to continue mounting
the file system on a read-only device if the journal requires recovery.

Signed-off-by: Simon Ruderich
Signed-off-by: Theodore Ts'o

Simon Ruderich
2017-10-19 01:06:37 +0800
235699a8f ext4: convert timers to use timer_setup() ... Browse Code »

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Signed-off-by: Kees Cook
Signed-off-by: Theodore Ts'o
Reviewed-by: Reviewed-by: Jan Kara
Cc: Andreas Dilger
Cc: linux-ext4@vger.kernel.org

Kees Cook
2017-10-19 00:45:17 +0800