13 Jun, 2018

1 commit

  • The kzalloc() function has a 2-factor argument form, kcalloc(). This
    patch replaces cases of:

    kzalloc(a * b, gfp)

    with:
    kcalloc(a * b, gfp)

    as well as handling cases of:

    kzalloc(a * b * c, gfp)

    with:

    kzalloc(array3_size(a, b, c), gfp)

    as it's slightly less ugly than:

    kzalloc_array(array_size(a, b), c, gfp)

    This does, however, attempt to ignore constant size factors like:

    kzalloc(4 * 1024, gfp)

    though any constants defined via macros get caught up in the conversion.

    Any factors with a sizeof() of "unsigned char", "char", and "u8" were
    dropped, since they're redundant.

    The Coccinelle script used for this was:

    // Fix redundant parens around sizeof().
    @@
    type TYPE;
    expression THING, E;
    @@

    (
    kzalloc(
    - (sizeof(TYPE)) * E
    + sizeof(TYPE) * E
    , ...)
    |
    kzalloc(
    - (sizeof(THING)) * E
    + sizeof(THING) * E
    , ...)
    )

    // Drop single-byte sizes and redundant parens.
    @@
    expression COUNT;
    typedef u8;
    typedef __u8;
    @@

    (
    kzalloc(
    - sizeof(u8) * (COUNT)
    + COUNT
    , ...)
    |
    kzalloc(
    - sizeof(__u8) * (COUNT)
    + COUNT
    , ...)
    |
    kzalloc(
    - sizeof(char) * (COUNT)
    + COUNT
    , ...)
    |
    kzalloc(
    - sizeof(unsigned char) * (COUNT)
    + COUNT
    , ...)
    |
    kzalloc(
    - sizeof(u8) * COUNT
    + COUNT
    , ...)
    |
    kzalloc(
    - sizeof(__u8) * COUNT
    + COUNT
    , ...)
    |
    kzalloc(
    - sizeof(char) * COUNT
    + COUNT
    , ...)
    |
    kzalloc(
    - sizeof(unsigned char) * COUNT
    + COUNT
    , ...)
    )

    // 2-factor product with sizeof(type/expression) and identifier or constant.
    @@
    type TYPE;
    expression THING;
    identifier COUNT_ID;
    constant COUNT_CONST;
    @@

    (
    - kzalloc
    + kcalloc
    (
    - sizeof(TYPE) * (COUNT_ID)
    + COUNT_ID, sizeof(TYPE)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(TYPE) * COUNT_ID
    + COUNT_ID, sizeof(TYPE)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(TYPE) * (COUNT_CONST)
    + COUNT_CONST, sizeof(TYPE)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(TYPE) * COUNT_CONST
    + COUNT_CONST, sizeof(TYPE)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(THING) * (COUNT_ID)
    + COUNT_ID, sizeof(THING)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(THING) * COUNT_ID
    + COUNT_ID, sizeof(THING)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(THING) * (COUNT_CONST)
    + COUNT_CONST, sizeof(THING)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(THING) * COUNT_CONST
    + COUNT_CONST, sizeof(THING)
    , ...)
    )

    // 2-factor product, only identifiers.
    @@
    identifier SIZE, COUNT;
    @@

    - kzalloc
    + kcalloc
    (
    - SIZE * COUNT
    + COUNT, SIZE
    , ...)

    // 3-factor product with 1 sizeof(type) or sizeof(expression), with
    // redundant parens removed.
    @@
    expression THING;
    identifier STRIDE, COUNT;
    type TYPE;
    @@

    (
    kzalloc(
    - sizeof(TYPE) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kzalloc(
    - sizeof(TYPE) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kzalloc(
    - sizeof(TYPE) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kzalloc(
    - sizeof(TYPE) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    kzalloc(
    - sizeof(THING) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kzalloc(
    - sizeof(THING) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kzalloc(
    - sizeof(THING) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    kzalloc(
    - sizeof(THING) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    )

    // 3-factor product with 2 sizeof(variable), with redundant parens removed.
    @@
    expression THING1, THING2;
    identifier COUNT;
    type TYPE1, TYPE2;
    @@

    (
    kzalloc(
    - sizeof(TYPE1) * sizeof(TYPE2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    kzalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    kzalloc(
    - sizeof(THING1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    kzalloc(
    - sizeof(THING1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    kzalloc(
    - sizeof(TYPE1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    |
    kzalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    )

    // 3-factor product, only identifiers, with redundant parens removed.
    @@
    identifier STRIDE, SIZE, COUNT;
    @@

    (
    kzalloc(
    - (COUNT) * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kzalloc(
    - COUNT * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kzalloc(
    - COUNT * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kzalloc(
    - (COUNT) * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kzalloc(
    - COUNT * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kzalloc(
    - (COUNT) * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kzalloc(
    - (COUNT) * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    kzalloc(
    - COUNT * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    )

    // Any remaining multi-factor products, first at least 3-factor products,
    // when they're not all constants...
    @@
    expression E1, E2, E3;
    constant C1, C2, C3;
    @@

    (
    kzalloc(C1 * C2 * C3, ...)
    |
    kzalloc(
    - (E1) * E2 * E3
    + array3_size(E1, E2, E3)
    , ...)
    |
    kzalloc(
    - (E1) * (E2) * E3
    + array3_size(E1, E2, E3)
    , ...)
    |
    kzalloc(
    - (E1) * (E2) * (E3)
    + array3_size(E1, E2, E3)
    , ...)
    |
    kzalloc(
    - E1 * E2 * E3
    + array3_size(E1, E2, E3)
    , ...)
    )

    // And then all remaining 2 factors products when they're not all constants,
    // keeping sizeof() as the second factor argument.
    @@
    expression THING, E1, E2;
    type TYPE;
    constant C1, C2, C3;
    @@

    (
    kzalloc(sizeof(THING) * C2, ...)
    |
    kzalloc(sizeof(TYPE) * C2, ...)
    |
    kzalloc(C1 * C2 * C3, ...)
    |
    kzalloc(C1 * C2, ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(TYPE) * (E2)
    + E2, sizeof(TYPE)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(TYPE) * E2
    + E2, sizeof(TYPE)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(THING) * (E2)
    + E2, sizeof(THING)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - sizeof(THING) * E2
    + E2, sizeof(THING)
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - (E1) * E2
    + E1, E2
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - (E1) * (E2)
    + E1, E2
    , ...)
    |
    - kzalloc
    + kcalloc
    (
    - E1 * E2
    + E1, E2
    , ...)
    )

    Signed-off-by: Kees Cook

    Kees Cook
     

05 Jun, 2018

1 commit

  • Pull aio updates from Al Viro:
    "Majority of AIO stuff this cycle. aio-fsync and aio-poll, mostly.

    The only thing I'm holding back for a day or so is Adam's aio ioprio -
    his last-minute fixup is trivial (missing stub in !CONFIG_BLOCK case),
    but let it sit in -next for decency sake..."

    * 'work.aio-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (46 commits)
    aio: sanitize the limit checking in io_submit(2)
    aio: fold do_io_submit() into callers
    aio: shift copyin of iocb into io_submit_one()
    aio_read_events_ring(): make a bit more readable
    aio: all callers of aio_{read,write,fsync,poll} treat 0 and -EIOCBQUEUED the same way
    aio: take list removal to (some) callers of aio_complete()
    aio: add missing break for the IOCB_CMD_FDSYNC case
    random: convert to ->poll_mask
    timerfd: convert to ->poll_mask
    eventfd: switch to ->poll_mask
    pipe: convert to ->poll_mask
    crypto: af_alg: convert to ->poll_mask
    net/rxrpc: convert to ->poll_mask
    net/iucv: convert to ->poll_mask
    net/phonet: convert to ->poll_mask
    net/nfc: convert to ->poll_mask
    net/caif: convert to ->poll_mask
    net/bluetooth: convert to ->poll_mask
    net/sctp: convert to ->poll_mask
    net/tipc: convert to ->poll_mask
    ...

    Linus Torvalds
     

26 May, 2018

1 commit


24 Apr, 2018

1 commit

  • This patch initialize stack variables which are used in
    frag_lowpan_compare_key to zero. In my case there are padding bytes in the
    structures ieee802154_addr as well in frag_lowpan_compare_key. Otherwise
    the key variable contains random bytes. The result is that a compare of
    two keys by memcmp works incorrect.

    Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units")
    Signed-off-by: Alexander Aring
    Reported-by: Stefan Schmidt
    Signed-off-by: Stefan Schmidt

    Alexander Aring
     

05 Apr, 2018

1 commit

  • Giving an integer to proc_doulongvec_minmax() is dangerous on 64bit arches,
    since linker might place next to it a non zero value preventing a change
    to ip6frag_low_thresh.

    ip6frag_low_thresh is not used anymore in the kernel, but we do not
    want to prematuraly break user scripts wanting to change it.

    Since specifying a minimal value of 0 for proc_doulongvec_minmax()
    is moot, let's remove these zero values in all defrag units.

    Fixes: 6e00f7dd5e4e ("ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh")
    Signed-off-by: Eric Dumazet
    Reported-by: Maciej Żenczykowski
    Signed-off-by: David S. Miller

    Eric Dumazet
     

01 Apr, 2018

6 commits

  • Some users are willing to provision huge amounts of memory to be able
    to perform reassembly reasonnably well under pressure.

    Current memory tracking is using one atomic_t and integers.

    Switch to atomic_long_t so that 64bit arches can use more than 2GB,
    without any cost for 32bit arches.

    Note that this patch avoids an overflow error, if high_thresh was set
    to ~2GB, since this test in inet_frag_alloc() was never true :

    if (... || frag_mem_limit(nf) > nf->high_thresh)

    Tested:

    $ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh

    $ grep FRAG /proc/net/sockstat
    FRAG: inuse 14705885 memory 16000002880

    $ nstat -n ; sleep 1 ; nstat | grep Reas
    IpReasmReqds 3317150 0.0
    IpReasmFails 3317112 0.0

    Signed-off-by: Eric Dumazet
    Signed-off-by: David S. Miller

    Eric Dumazet
     
  • This function is obsolete, after rhashtable addition to inet defrag.

    Signed-off-by: Eric Dumazet
    Signed-off-by: David S. Miller

    Eric Dumazet
     
  • Some applications still rely on IP fragmentation, and to be fair linux
    reassembly unit is not working under any serious load.

    It uses static hash tables of 1024 buckets, and up to 128 items per bucket (!!!)

    A work queue is supposed to garbage collect items when host is under memory
    pressure, and doing a hash rebuild, changing seed used in hash computations.

    This work queue blocks softirqs for up to 25 ms when doing a hash rebuild,
    occurring every 5 seconds if host is under fire.

    Then there is the problem of sharing this hash table for all netns.

    It is time to switch to rhashtables, and allocate one of them per netns
    to speedup netns dismantle, since this is a critical metric these days.

    Lookup is now using RCU. A followup patch will even remove
    the refcount hold/release left from prior implementation and save
    a couple of atomic operations.

    Before this patch, 16 cpus (16 RX queue NIC) could not handle more
    than 1 Mpps frags DDOS.

    After the patch, I reach 9 Mpps without any tuning, and can use up to 2GB
    of storage for the fragments (exact number depends on frags being evicted
    after timeout)

    $ grep FRAG /proc/net/sockstat
    FRAG: inuse 1966916 memory 2140004608

    A followup patch will change the limits for 64bit arches.

    Signed-off-by: Eric Dumazet
    Cc: Kirill Tkhai
    Cc: Herbert Xu
    Cc: Florian Westphal
    Cc: Jesper Dangaard Brouer
    Cc: Alexander Aring
    Cc: Stefan Schmidt
    Signed-off-by: David S. Miller

    Eric Dumazet
     
  • We want to call lowpan_net_frag_init() earlier.
    Similar to commit "inet: frags: refactor ipv6_frag_init()"

    This is a prereq to "inet: frags: use rhashtables for reassembly units"

    Signed-off-by: Eric Dumazet
    Signed-off-by: David S. Miller

    Eric Dumazet
     
  • In order to simplify the API, add a pointer to struct inet_frags.
    This will allow us to make things less complex.

    These functions no longer have a struct inet_frags parameter :

    inet_frag_destroy(struct inet_frag_queue *q /*, struct inet_frags *f */)
    inet_frag_put(struct inet_frag_queue *q /*, struct inet_frags *f */)
    inet_frag_kill(struct inet_frag_queue *q /*, struct inet_frags *f */)
    inet_frags_exit_net(struct netns_frags *nf /*, struct inet_frags *f */)
    ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)

    Signed-off-by: Eric Dumazet
    Signed-off-by: David S. Miller

    Eric Dumazet
     
  • We will soon initialize one rhashtable per struct netns_frags
    in inet_frags_init_net().

    This patch changes the return value to eventually propagate an
    error.

    Signed-off-by: Eric Dumazet
    Signed-off-by: David S. Miller

    Eric Dumazet
     

28 Mar, 2018

1 commit


23 Mar, 2018

1 commit

  • Fun set of conflict resolutions here...

    For the mac80211 stuff, these were fortunately just parallel
    adds. Trivially resolved.

    In drivers/net/phy/phy.c we had a bug fix in 'net' that moved the
    function phy_disable_interrupts() earlier in the file, whilst in
    'net-next' the phy_error() call from this function was removed.

    In net/ipv4/xfrm4_policy.c, David Ahern's changes to remove the
    'rt_table_id' member of rtable collided with a bug fix in 'net' that
    added a new struct member "rt_mtu_locked" which needs to be copied
    over here.

    The mlxsw driver conflict consisted of net-next separating
    the span code and definitions into separate files, whilst
    a 'net' bug fix made some changes to that moved code.

    The mlx5 infiniband conflict resolution was quite non-trivial,
    the RDMA tree's merge commit was used as a guide here, and
    here are their notes:

    ====================

    Due to bug fixes found by the syzkaller bot and taken into the for-rc
    branch after development for the 4.17 merge window had already started
    being taken into the for-next branch, there were fairly non-trivial
    merge issues that would need to be resolved between the for-rc branch
    and the for-next branch. This merge resolves those conflicts and
    provides a unified base upon which ongoing development for 4.17 can
    be based.

    Conflicts:
    drivers/infiniband/hw/mlx5/main.c - Commit 42cea83f9524
    (IB/mlx5: Fix cleanup order on unload) added to for-rc and
    commit b5ca15ad7e61 (IB/mlx5: Add proper representors support)
    add as part of the devel cycle both needed to modify the
    init/de-init functions used by mlx5. To support the new
    representors, the new functions added by the cleanup patch
    needed to be made non-static, and the init/de-init list
    added by the representors patch needed to be modified to
    match the init/de-init list changes made by the cleanup
    patch.
    Updates:
    drivers/infiniband/hw/mlx5/mlx5_ib.h - Update function
    prototypes added by representors patch to reflect new function
    names as changed by cleanup patch
    drivers/infiniband/hw/mlx5/ib_rep.c - Update init/de-init
    stage list to match new order from cleanup patch
    ====================

    Signed-off-by: David S. Miller

    David S. Miller
     

22 Mar, 2018

1 commit

  • These pernet_operations register and unregister sysctl.
    Also, there is inet_frags_exit_net() called in exit method,
    which has to be safe after a560002437d3 "net: Fix hlist
    corruptions in inet_evict_bucket()".

    Signed-off-by: Kirill Tkhai
    Signed-off-by: David S. Miller

    Kirill Tkhai
     

10 Mar, 2018

1 commit

  • A tun device type can trivially be set to arbitrary value using
    TUNSETLINK ioctl().

    Therefore, lowpan_device_event() must really check that ieee802154_ptr
    is not NULL.

    Fixes: 2c88b5283f60d ("ieee802154: 6lowpan: remove check on null")
    Signed-off-by: Eric Dumazet
    Cc: Alexander Aring
    Cc: Stefan Schmidt
    Reported-by: syzbot
    Acked-by: Stefan Schmidt
    Signed-off-by: David S. Miller

    Eric Dumazet
     

28 Feb, 2018

2 commits

  • …inux/kernel/git/sschmidt/wpan-next

    Stefan Schmidt says:

    ====================
    pull-request: ieee802154-next 2018-02-26

    An update from ieee802154 for *net-next*

    Alexander corrected a setting which got lost during some 6lowpan rework
    a while back and Xue Liu provided us with a new driver for the MCR20A
    transceiver.

    If there are any issues let me know. If not, please pull.
    ====================

    Signed-off-by: David S. Miller <davem@davemloft.net>

    David S. Miller
     
  • These pernet_operations have only exit method, which
    moves devices from cfg802154_rdev_list to init_net.
    This may occur in any time from nl802154_wpan_phy_netns(),
    so we are nice with rtnl_lock() synchronization.

    Signed-off-by: Kirill Tkhai
    Acked-by: Stefan Schmidt
    Signed-off-by: David S. Miller

    Kirill Tkhai
     

14 Feb, 2018

1 commit

  • This patch sets the IFF_NO_QUEUE for IEEE 802.15.4 6lowpan interfaces. As
    commit 24dcbf662205 ("6lowpan: Don't set IFF_NO_QUEUE") removes it for
    "reasons" from the bluetooth 6lowpan subsystem. In IEEE 802.15.4 the lower
    interface deals with one qdisc for the real hardware, 6LoWPAN does the
    protocol adaption only and no second queuing on top.

    Signed-off-by: Alexander Aring
    Signed-off-by: Stefan Schmidt

    Alexander Aring
     

04 Nov, 2017

1 commit


02 Nov, 2017

1 commit

  • Many source files in the tree are missing licensing information, which
    makes it harder for compliance tools to determine the correct license.

    By default all files without license information are under the default
    license of the kernel, which is GPL version 2.

    Update the files which contain no license information with the 'GPL-2.0'
    SPDX license identifier. The SPDX identifier is a legally binding
    shorthand, which can be used instead of the full boiler plate text.

    This patch is based on work done by Thomas Gleixner and Kate Stewart and
    Philippe Ombredanne.

    How this work was done:

    Patches were generated and checked against linux-4.14-rc6 for a subset of
    the use cases:
    - file had no licensing information it it.
    - file was a */uapi/* one with no licensing information in it,
    - file was a */uapi/* one with existing licensing information,

    Further patches will be generated in subsequent months to fix up cases
    where non-standard license headers were used, and references to license
    had to be inferred by heuristics based on keywords.

    The analysis to determine which SPDX License Identifier to be applied to
    a file was done in a spreadsheet of side by side results from of the
    output of two independent scanners (ScanCode & Windriver) producing SPDX
    tag:value files created by Philippe Ombredanne. Philippe prepared the
    base worksheet, and did an initial spot review of a few 1000 files.

    The 4.13 kernel was the starting point of the analysis with 60,537 files
    assessed. Kate Stewart did a file by file comparison of the scanner
    results in the spreadsheet to determine which SPDX license identifier(s)
    to be applied to the file. She confirmed any determination that was not
    immediately clear with lawyers working with the Linux Foundation.

    Criteria used to select files for SPDX license identifier tagging was:
    - Files considered eligible had to be source code files.
    - Make and config files were included as candidates if they contained >5
    lines of source
    - File already had some variant of a license header in it (even if
    Reviewed-by: Philippe Ombredanne
    Reviewed-by: Thomas Gleixner
    Signed-off-by: Greg Kroah-Hartman

    Greg Kroah-Hartman
     

18 Oct, 2017

2 commits

  • Stefan Schmidt
     
  • In preparation for unconditionally passing the struct timer_list pointer to
    all timer callbacks, switch to using the new timer_setup() and from_timer()
    to pass the timer pointer explicitly.

    Cc: Alexander Aring
    Cc: Stefan Schmidt
    Cc: "David S. Miller"
    Cc: Alexey Kuznetsov
    Cc: Hideaki YOSHIFUJI
    Cc: Pablo Neira Ayuso
    Cc: Jozsef Kadlecsik
    Cc: Florian Westphal
    Cc: linux-wpan@vger.kernel.org
    Cc: netdev@vger.kernel.org
    Cc: netfilter-devel@vger.kernel.org
    Cc: coreteam@netfilter.org
    Signed-off-by: Kees Cook
    Acked-by: Stefan Schmidt # for ieee802154
    Signed-off-by: David S. Miller

    Kees Cook
     

17 Oct, 2017

1 commit


06 Sep, 2017

1 commit


04 Sep, 2017

1 commit

  • This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb.

    After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API
    for fragmentation mem accounting") then here is no need for this
    fix-up patch. As percpu_counter is no longer used, it cannot
    memory leak it any-longer.

    Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting")
    Fixes: 1d6119baf061 ("net: fix percpu memory leaks")
    Signed-off-by: Jesper Dangaard Brouer
    Signed-off-by: David S. Miller

    Jesper Dangaard Brouer
     

26 Aug, 2017

1 commit


27 Jun, 2017

2 commits


16 Jun, 2017

1 commit

  • A common pattern with skb_put() is to just want to memcpy()
    some data into the new space, introduce skb_put_data() for
    this.

    An spatch similar to the one for skb_put_zero() converts many
    of the places using it:

    @@
    identifier p, p2;
    expression len, skb, data;
    type t, t2;
    @@
    (
    -p = skb_put(skb, len);
    +p = skb_put_data(skb, data, len);
    |
    -p = (t)skb_put(skb, len);
    +p = skb_put_data(skb, data, len);
    )
    (
    p2 = (t2)p;
    -memcpy(p2, data, len);
    |
    -memcpy(p, data, len);
    )

    @@
    type t, t2;
    identifier p, p2;
    expression skb, data;
    @@
    t *p;
    ...
    (
    -p = skb_put(skb, sizeof(t));
    +p = skb_put_data(skb, data, sizeof(t));
    |
    -p = (t *)skb_put(skb, sizeof(t));
    +p = skb_put_data(skb, data, sizeof(t));
    )
    (
    p2 = (t2)p;
    -memcpy(p2, data, sizeof(*p));
    |
    -memcpy(p, data, sizeof(*p));
    )

    @@
    expression skb, len, data;
    @@
    -memcpy(skb_put(skb, len), data, len);
    +skb_put_data(skb, data, len);

    (again, manually post-processed to retain some comments)

    Reviewed-by: Stephen Hemminger
    Signed-off-by: Johannes Berg
    Signed-off-by: David S. Miller

    Johannes Berg
     

15 Jun, 2017

1 commit


08 Jun, 2017

1 commit

  • Network devices can allocate reasources and private memory using
    netdev_ops->ndo_init(). However, the release of these resources
    can occur in one of two different places.

    Either netdev_ops->ndo_uninit() or netdev->destructor().

    The decision of which operation frees the resources depends upon
    whether it is necessary for all netdev refs to be released before it
    is safe to perform the freeing.

    netdev_ops->ndo_uninit() presumably can occur right after the
    NETDEV_UNREGISTER notifier completes and the unicast and multicast
    address lists are flushed.

    netdev->destructor(), on the other hand, does not run until the
    netdev references all go away.

    Further complicating the situation is that netdev->destructor()
    almost universally does also a free_netdev().

    This creates a problem for the logic in register_netdevice().
    Because all callers of register_netdevice() manage the freeing
    of the netdev, and invoke free_netdev(dev) if register_netdevice()
    fails.

    If netdev_ops->ndo_init() succeeds, but something else fails inside
    of register_netdevice(), it does call ndo_ops->ndo_uninit(). But
    it is not able to invoke netdev->destructor().

    This is because netdev->destructor() will do a free_netdev() and
    then the caller of register_netdevice() will do the same.

    However, this means that the resources that would normally be released
    by netdev->destructor() will not be.

    Over the years drivers have added local hacks to deal with this, by
    invoking their destructor parts by hand when register_netdevice()
    fails.

    Many drivers do not try to deal with this, and instead we have leaks.

    Let's close this hole by formalizing the distinction between what
    private things need to be freed up by netdev->destructor() and whether
    the driver needs unregister_netdevice() to perform the free_netdev().

    netdev->priv_destructor() performs all actions to free up the private
    resources that used to be freed by netdev->destructor(), except for
    free_netdev().

    netdev->needs_free_netdev is a boolean that indicates whether
    free_netdev() should be done at the end of unregister_netdevice().

    Now, register_netdevice() can sanely release all resources after
    ndo_ops->ndo_init() succeeds, by invoking both ndo_ops->ndo_uninit()
    and netdev->priv_destructor().

    And at the end of unregister_netdevice(), we invoke
    netdev->priv_destructor() and optionally call free_netdev().

    Signed-off-by: David S. Miller

    David S. Miller
     

24 May, 2017

2 commits

  • This patch fixes the kernel oops when release net_device reference in
    advance. In function raw_sendmsg(i think the dgram_sendmsg has the same
    problem), there is a race condition between dev_put and dev_queue_xmit
    when the device is gong that maybe lead to dev_queue_ximt to see
    an illegal net_device pointer.

    My test kernel is 3.13.0-32 and because i am not have a real 802154
    device, so i change lowpan_newlink function to this:

    /* find and hold real wpan device */
    real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
    if (!real_dev)
    return -ENODEV;
    // if (real_dev->type != ARPHRD_IEEE802154) {
    // dev_put(real_dev);
    // return -EINVAL;
    // }
    lowpan_dev_info(dev)->real_dev = real_dev;
    lowpan_dev_info(dev)->fragment_tag = 0;
    mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);

    Also, in order to simulate preempt, i change the raw_sendmsg function
    to this:

    skb->dev = dev;
    skb->sk = sk;
    skb->protocol = htons(ETH_P_IEEE802154);
    dev_put(dev);
    //simulate preempt
    schedule_timeout_uninterruptible(30 * HZ);
    err = dev_queue_xmit(skb);
    if (err > 0)
    err = net_xmit_errno(err);

    and this is my userspace test code named test_send_data:

    int main(int argc, char **argv)
    {
    char buf[127];
    int sockfd;
    sockfd = socket(AF_IEEE802154, SOCK_RAW, 0);
    if (sockfd < 0) {
    printf("create sockfd error: %s\n", strerror(errno));
    return -1;
    }
    send(sockfd, buf, sizeof(buf), 0);
    return 0;
    }

    This is my test case:

    root@zhanglin-x-computer:~/develop/802154# uname -a
    Linux zhanglin-x-computer 3.13.0-32-generic #57-Ubuntu SMP Tue Jul 15
    03:51:08 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
    root@zhanglin-x-computer:~/develop/802154# ip link add link eth0 name
    lowpan0 type lowpan
    root@zhanglin-x-computer:~/develop/802154#
    //keep the lowpan0 device down
    root@zhanglin-x-computer:~/develop/802154# ./test_send_data &
    //wait a while
    root@zhanglin-x-computer:~/develop/802154# ip link del link dev lowpan0
    //the device is gone
    //oops
    [381.303307] general protection fault: 0000 [#1]SMP
    [381.303407] Modules linked in: af_802154 6lowpan bnep rfcomm
    bluetooth nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
    rts5139(C) snd_hda_intel
    snd_had_codec snd_hwdep snd_pcm snd_page_alloc snd_seq_midi
    snd_seq_midi_event snd_rawmidi snd_req intel_rapl snd_seq_device
    coretemp i915 kvm_intel
    kvm snd_timer snd crct10dif_pclmul crc32_pclmul ghash_clmulni_intel
    cypted drm_kms_helper drm i2c_algo_bit soundcore video mac_hid
    parport_pc ppdev ip parport hid_generic
    usbhid hid ahci r8169 mii libahdi
    [381.304286] CPU:1 PID: 2524 Commm: 1 Tainted: G C 0 3.13.0-32-generic
    [381.304409] Hardware name: Haier Haier DT Computer/Haier DT Codputer,
    BIOS FIBT19H02_X64 06/09/2014
    [381.304546] tasks: ffff000096965fc0 ti: ffffB0013779c000 task.ti:
    ffffB8013779c000
    [381.304659] RIP: 0010:[] []
    __dev_queue_ximt+0x61/0x500
    [381.304798] RSP: 0018:ffffB8013779dca0 EFLAGS: 00010202
    [381.304880] RAX: 272b031d57565351 RBX: 0000000000000000 RCX: ffff8800968f1a00
    [381.304987] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800968f1a00
    [381.305095] RBP: ffff8e013773dce0 R08: 0000000000000266 R09: 0000000000000004
    [381.305202] R10: 0000000000000004 R11: 0000000000000005 R12: ffff88013902e000
    [381.305310] R13: 000000000000007f R14: 000000000000007f R15: ffff8800968f1a00
    [381.305418] FS: 00007fc57f50f740(0000) GS: ffff88013fc80000(0000)
    knlGS: 0000000000000000
    [381.305540] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
    [381.305627] CR2: 00007fad0841c000 CR3: 00000001368dd000 CR4: 00000000001007e0
    [361.905734] Stack:
    [381.305768] 00000000002052d0 000000003facb30a ffff88013779dcc0
    ffff880137764000
    [381.305898] ffff88013779de70 000000000000007f 000000000000007f
    ffff88013902e000
    [381.306026] ffff88013779dcf0 ffffffff81622490 ffff88013779dd39
    ffffffffa03af9f1
    [381.306155] Call Trace:
    [381.306202] [] dev_queue_xmit+0x10/0x20
    [381.306294] [] raw_sendmsg+0x1b1/0x270 [af_802154]
    [381.306396] [] ieee802154_sock_sendmsg+0x14/0x20 [af_802154]
    [381.306512] [] sock_sendmsg+0x8b/0xc0
    [381.306600] [] ? __d_alloc+0x25/0x180
    [381.306687] [] ? kmem_cache_alloc_trace+0x1c6/0x1f0
    [381.306791] [] SYSC_sendto+0x121/0x1c0
    [381.306878] [] ? vtime_account_user+x54/0x60
    [381.306975] [] ? syscall_trace_enter+0x145/0x250
    [381.307073] [] SyS_sendto+0xe/0x10
    [381.307156] [] tracesys+0xe1/0xe6
    [381.307233] Code: c6 a1 a4 ff 41 8b 57 78 49 8b 47 20 85 d2 48 8b 80
    78 07 00 00 75 21 49 8b 57 18 48 85 d2 74 18 48 85 c0 74 13 8b 92 ac
    01 00 00 50 10 73 08 8b 44 90 14 41 89 47 78 41 f6 84 24 d5 00 00
    00
    [381.307801] RIP [] _dev_queue_xmit+0x61/0x500
    [381.307901] RSP
    [381.347512] Kernel panic - not syncing: Fatal exception in interrupt
    [381.347747] drm_kms_helper: panic occurred, switching back to text console

    In my opinion, there is always exist a chance that the device is gong
    before call dev_queue_xmit.

    I think the latest kernel is have the same problem and that
    dev_put should be behind of the dev_queue_xmit.

    Signed-off-by: Lin Zhang
    Acked-by: Stefan Schmidt
    Signed-off-by: Marcel Holtmann

    Lin Zhang
     
  • Explicit set skb->sk is needless, sock_alloc_send_skb is already set it.

    Signed-off-by: Lin Zhang
    Acked-by: Stefan Schmidt
    Signed-off-by: Marcel Holtmann

    Lin Zhang
     

14 Apr, 2017

2 commits

  • This is an add-on to the previous patch that passes the extended ACK
    structure where it's already available by existing genl_info or extack
    function arguments.

    This was done with this spatch (with some manual adjustment of
    indentation):

    @@
    expression A, B, C, D, E;
    identifier fn, info;
    @@
    fn(..., struct genl_info *info, ...) {
    ...
    -nlmsg_parse(A, B, C, D, E, NULL)
    +nlmsg_parse(A, B, C, D, E, info->extack)
    ...
    }

    @@
    expression A, B, C, D, E;
    identifier fn, info;
    @@
    fn(..., struct genl_info *info, ...) {
    extack)
    ...>
    }

    @@
    expression A, B, C, D, E;
    identifier fn, extack;
    @@
    fn(..., struct netlink_ext_ack *extack, ...) {

    }

    @@
    expression A, B, C, D, E;
    identifier fn, extack;
    @@
    fn(..., struct netlink_ext_ack *extack, ...) {

    }

    @@
    expression A, B, C, D, E;
    identifier fn, extack;
    @@
    fn(..., struct netlink_ext_ack *extack, ...) {
    ...
    -nlmsg_parse(A, B, C, D, E, NULL)
    +nlmsg_parse(A, B, C, D, E, extack)
    ...
    }

    @@
    expression A, B, C, D;
    identifier fn, extack;
    @@
    fn(..., struct netlink_ext_ack *extack, ...) {

    }

    @@
    expression A, B, C, D;
    identifier fn, extack;
    @@
    fn(..., struct netlink_ext_ack *extack, ...) {

    }

    @@
    expression A, B, C, D;
    identifier fn, extack;
    @@
    fn(..., struct netlink_ext_ack *extack, ...) {

    }

    @@
    expression A, B, C;
    identifier fn, extack;
    @@
    fn(..., struct netlink_ext_ack *extack, ...) {

    }

    Signed-off-by: Johannes Berg
    Reviewed-by: Jiri Pirko
    Signed-off-by: David S. Miller

    Johannes Berg
     
  • Pass the new extended ACK reporting struct to all of the generic
    netlink parsing functions. For now, pass NULL in almost all callers
    (except for some in the core.)

    Signed-off-by: Johannes Berg
    Signed-off-by: David S. Miller

    Johannes Berg
     

28 Feb, 2017

1 commit

  • Now that %z is standartised in C99 there is no reason to support %Z.
    Unlike %L it doesn't even make format strings smaller.

    Use BUILD_BUG_ON in a couple ATM drivers.

    In case anyone didn't notice lib/vsprintf.o is about half of SLUB which
    is in my opinion is quite an achievement. Hopefully this patch inspires
    someone else to trim vsprintf.c more.

    Link: http://lkml.kernel.org/r/20170103230126.GA30170@avx2
    Signed-off-by: Alexey Dobriyan
    Cc: Andy Shevchenko
    Cc: Rasmus Villemoes
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Alexey Dobriyan
     

16 Dec, 2016

2 commits

  • That's the default now, no need for makefiles to set it.

    Signed-off-by: Michael S. Tsirkin
    Acked-by: Kalle Valo
    Acked-by: Marcel Holtmann
    Acked-by: Marc Kleine-Budde
    Acked-by: Greg Kroah-Hartman
    Acked-by: Arend van Spriel

    Michael S. Tsirkin
     
  • __bitwise__ used to mean "yes, please enable sparse checks
    unconditionally", but now that we dropped __CHECK_ENDIAN__
    __bitwise is exactly the same.
    There aren't many users, replace it by __bitwise everywhere.

    Signed-off-by: Michael S. Tsirkin
    Acked-by: Greg Kroah-Hartman
    Acked-by: Stefan Schmidt
    Acked-by: Krzysztof Kozlowski
    Akced-by: Lee Duncan

    Michael S. Tsirkin
     

30 Nov, 2016

1 commit

  • I've observed a NULL pointer dereference in ieee802154_del_iface() during
    netlink fuzzing. It's the ->wpan_phy dereference here:

    phy = dev->ieee802154_ptr->wpan_phy;

    My bet is that we're not checking that this is an IEEE802154 interface,
    so let's do what ieee802154_nl_get_dev() is doing. (Maybe we should even
    be calling this directly?)

    Cc: Lennert Buytenhek
    Cc: Alexander Aring
    Cc: Marcel Holtmann
    Cc: Dmitry Eremin-Solenikov
    Cc: Sergey Lapin
    Signed-off-by: Vegard Nossum
    Acked-by: Alexander Aring
    Signed-off-by: Stefan Schmidt

    vegard.nossum@oracle.com
     

28 Oct, 2016

1 commit

  • Now genl_register_family() is the only thing (other than the
    users themselves, perhaps, but I didn't find any doing that)
    writing to the family struct.

    In all families that I found, genl_register_family() is only
    called from __init functions (some indirectly, in which case
    I've add __init annotations to clarifly things), so all can
    actually be marked __ro_after_init.

    This protects the data structure from accidental corruption.

    Signed-off-by: Johannes Berg
    Signed-off-by: David S. Miller

    Johannes Berg