25 May, 2016

1 commit


21 May, 2016

2 commits

  • The binary GCD algorithm is based on the following facts:
    1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
    2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
    3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)

    Even on x86 machines with reasonable division hardware, the binary
    algorithm runs about 25% faster (80% the execution time) than the
    division-based Euclidian algorithm.

    On platforms like Alpha and ARMv6 where division is a function call to
    emulation code, it's even more significant.

    There are two variants of the code here, depending on whether a fast
    __ffs (find least significant set bit) instruction is available. This
    allows the unpredictable branches in the bit-at-a-time shifting loop to
    be eliminated.

    If fast __ffs is not available, the "even/odd" GCD variant is used.

    I use the following code to benchmark:

    #include
    #include
    #include
    #include
    #include
    #include

    #define swap(a, b) \
    do { \
    a ^= b; \
    b ^= a; \
    a ^= b; \
    } while (0)

    unsigned long gcd0(unsigned long a, unsigned long b)
    {
    unsigned long r;

    if (a < b) {
    swap(a, b);
    }

    if (b == 0)
    return a;

    while ((r = a % b) != 0) {
    a = b;
    b = r;
    }

    return b;
    }

    unsigned long gcd1(unsigned long a, unsigned long b)
    {
    unsigned long r = a | b;

    if (!a || !b)
    return r;

    b >>= __builtin_ctzl(b);

    for (;;) {
    a >>= __builtin_ctzl(a);
    if (a == b)
    return a << __builtin_ctzl(r);

    if (a < b)
    swap(a, b);
    a -= b;
    }
    }

    unsigned long gcd2(unsigned long a, unsigned long b)
    {
    unsigned long r = a | b;

    if (!a || !b)
    return r;

    r &= -r;

    while (!(b & r))
    b >>= 1;

    for (;;) {
    while (!(a & r))
    a >>= 1;
    if (a == b)
    return a;

    if (a < b)
    swap(a, b);
    a -= b;
    a >>= 1;
    if (a & r)
    a += b;
    a >>= 1;
    }
    }

    unsigned long gcd3(unsigned long a, unsigned long b)
    {
    unsigned long r = a | b;

    if (!a || !b)
    return r;

    b >>= __builtin_ctzl(b);
    if (b == 1)
    return r & -r;

    for (;;) {
    a >>= __builtin_ctzl(a);
    if (a == 1)
    return r & -r;
    if (a == b)
    return a << __builtin_ctzl(r);

    if (a < b)
    swap(a, b);
    a -= b;
    }
    }

    unsigned long gcd4(unsigned long a, unsigned long b)
    {
    unsigned long r = a | b;

    if (!a || !b)
    return r;

    r &= -r;

    while (!(b & r))
    b >>= 1;
    if (b == r)
    return r;

    for (;;) {
    while (!(a & r))
    a >>= 1;
    if (a == r)
    return r;
    if (a == b)
    return a;

    if (a < b)
    swap(a, b);
    a -= b;
    a >>= 1;
    if (a & r)
    a += b;
    a >>= 1;
    }
    }

    static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
    gcd0, gcd1, gcd2, gcd3, gcd4,
    };

    #define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))

    #if defined(__x86_64__)

    #define rdtscll(val) do { \
    unsigned long __a,__d; \
    __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
    (val) = ((unsigned long long)__a) | (((unsigned long long)__d)<= start)
    ret = end - start;
    else
    ret = ~0ULL - start + 1 + end;

    *res = gcd_res;
    return ret;
    }

    #else

    static inline struct timespec read_time(void)
    {
    struct timespec time;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
    return time;
    }

    static inline unsigned long long diff_time(struct timespec start, struct timespec end)
    {
    struct timespec temp;

    if ((end.tv_nsec - start.tv_nsec) < 0) {
    temp.tv_sec = end.tv_sec - start.tv_sec - 1;
    temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
    } else {
    temp.tv_sec = end.tv_sec - start.tv_sec;
    temp.tv_nsec = end.tv_nsec - start.tv_nsec;
    }

    return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
    }

    static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
    unsigned long a, unsigned long b, unsigned long *res)
    {
    struct timespec start, end;
    unsigned long gcd_res;

    start = read_time();
    gcd_res = gcd(a, b);
    end = read_time();

    *res = gcd_res;
    return diff_time(start, end);
    }

    #endif

    static inline unsigned long get_rand()
    {
    if (sizeof(long) == 8)
    return (unsigned long)rand() << 32 | rand();
    else
    return rand();
    }

    int main(int argc, char **argv)
    {
    unsigned int seed = time(0);
    int loops = 100;
    int repeats = 1000;
    unsigned long (*res)[TEST_ENTRIES];
    unsigned long long elapsed[TEST_ENTRIES];
    int i, j, k;

    for (;;) {
    int opt = getopt(argc, argv, "n:r:s:");
    /* End condition always first */
    if (opt == -1)
    break;

    switch (opt) {
    case 'n':
    loops = atoi(optarg);
    break;
    case 'r':
    repeats = atoi(optarg);
    break;
    case 's':
    seed = strtoul(optarg, NULL, 10);
    break;
    default:
    /* You won't actually get here. */
    break;
    }
    }

    res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
    memset(elapsed, 0, sizeof(elapsed));

    srand(seed);
    for (j = 0; j < loops; j++) {
    unsigned long a = get_rand();
    /* Do we have args? */
    unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
    unsigned long long min_elapsed[TEST_ENTRIES];
    for (k = 0; k < repeats; k++) {
    for (i = 0; i < TEST_ENTRIES; i++) {
    unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
    if (k == 0 || min_elapsed[i] > tmp)
    min_elapsed[i] = tmp;
    }
    }
    for (i = 0; i < TEST_ENTRIES; i++)
    elapsed[i] += min_elapsed[i];
    }

    for (i = 0; i < TEST_ENTRIES; i++)
    printf("gcd%d: elapsed %llu\n", i, elapsed[i]);

    k = 0;
    srand(seed);
    for (j = 0; j < loops; j++) {
    unsigned long a = get_rand();
    unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
    for (i = 1; i < TEST_ENTRIES; i++) {
    if (res[j][i] != res[j][0])
    break;
    }
    if (i < TEST_ENTRIES) {
    if (k == 0) {
    k = 1;
    fprintf(stderr, "Error:\n");
    }
    fprintf(stderr, "gcd(%lu, %lu): ", a, b);
    for (i = 0; i < TEST_ENTRIES; i++)
    fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
    }
    }

    if (k == 0)
    fprintf(stderr, "PASS\n");

    free(res);

    return 0;
    }

    Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:

    zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
    gcd0: elapsed 10174
    gcd1: elapsed 2120
    gcd2: elapsed 2902
    gcd3: elapsed 2039
    gcd4: elapsed 2812
    PASS
    zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
    gcd0: elapsed 9309
    gcd1: elapsed 2280
    gcd2: elapsed 2822
    gcd3: elapsed 2217
    gcd4: elapsed 2710
    PASS
    zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
    gcd0: elapsed 9589
    gcd1: elapsed 2098
    gcd2: elapsed 2815
    gcd3: elapsed 2030
    gcd4: elapsed 2718
    PASS
    zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
    gcd0: elapsed 9914
    gcd1: elapsed 2309
    gcd2: elapsed 2779
    gcd3: elapsed 2228
    gcd4: elapsed 2709
    PASS

    [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
    Signed-off-by: Zhaoxiu Zeng
    Signed-off-by: George Spelvin
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Zhaoxiu Zeng
     
  • Define HAVE_EXIT_THREAD for archs which want to do something in
    exit_thread. For others, let's define exit_thread as an empty inline.

    This is a cleanup before we change the prototype of exit_thread to
    accept a task parameter.

    [akpm@linux-foundation.org: fix mips]
    Signed-off-by: Jiri Slaby
    Cc: "David S. Miller"
    Cc: "H. Peter Anvin"
    Cc: "James E.J. Bottomley"
    Cc: Aurelien Jacquiot
    Cc: Benjamin Herrenschmidt
    Cc: Catalin Marinas
    Cc: Chen Liqin
    Cc: Chris Metcalf
    Cc: Chris Zankel
    Cc: David Howells
    Cc: Fenghua Yu
    Cc: Geert Uytterhoeven
    Cc: Guan Xuetao
    Cc: Haavard Skinnemoen
    Cc: Hans-Christian Egtvedt
    Cc: Heiko Carstens
    Cc: Helge Deller
    Cc: Ingo Molnar
    Cc: Ivan Kokshaysky
    Cc: James Hogan
    Cc: Jeff Dike
    Cc: Jesper Nilsson
    Cc: Jiri Slaby
    Cc: Jonas Bonn
    Cc: Koichi Yasutake
    Cc: Lennox Wu
    Cc: Ley Foon Tan
    Cc: Mark Salter
    Cc: Martin Schwidefsky
    Cc: Matt Turner
    Cc: Max Filippov
    Cc: Michael Ellerman
    Cc: Michal Simek
    Cc: Mikael Starvik
    Cc: Paul Mackerras
    Cc: Peter Zijlstra
    Cc: Ralf Baechle
    Cc: Rich Felker
    Cc: Richard Henderson
    Cc: Richard Kuo
    Cc: Richard Weinberger
    Cc: Russell King
    Cc: Steven Miao
    Cc: Thomas Gleixner
    Cc: Tony Luck
    Cc: Vineet Gupta
    Cc: Will Deacon
    Cc: Yoshinori Sato
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Jiri Slaby
     

05 May, 2016

1 commit

  • The newer renameat2 syscall provides all the functionality provided by
    the renameat syscall and adds flags, so future architectures won't need
    to include renameat.

    Therefore drop the renameat syscall from the generic syscall list unless
    __ARCH_WANT_RENAMEAT is defined by the architecture's unistd.h prior to
    including asm-generic/unistd.h, and adjust all architectures using the
    generic syscall list to define it so that no in-tree architectures are
    affected.

    Signed-off-by: James Hogan
    Acked-by: Vineet Gupta
    Cc: linux-arch@vger.kernel.org
    Cc: linux-snps-arc@lists.infradead.org
    Cc: Catalin Marinas
    Cc: Will Deacon
    Cc: linux-arm-kernel@lists.infradead.org
    Cc: Mark Salter
    Cc: Aurelien Jacquiot
    Cc: linux-c6x-dev@linux-c6x.org
    Cc: Richard Kuo
    Cc: linux-hexagon@vger.kernel.org
    Cc: linux-metag@vger.kernel.org
    Cc: Jonas Bonn
    Cc: linux@lists.openrisc.net
    Cc: Chen Liqin
    Cc: Lennox Wu
    Cc: Chris Metcalf
    Cc: Guan Xuetao
    Cc: Ley Foon Tan
    Cc: nios2-dev@lists.rocketboards.org
    Cc: Yoshinori Sato
    Cc: uclinux-h8-devel@lists.sourceforge.jp
    Signed-off-by: Arnd Bergmann

    James Hogan
     

20 Mar, 2016

1 commit

  • Pull networking updates from David Miller:
    "Highlights:

    1) Support more Realtek wireless chips, from Jes Sorenson.

    2) New BPF types for per-cpu hash and arrap maps, from Alexei
    Starovoitov.

    3) Make several TCP sysctls per-namespace, from Nikolay Borisov.

    4) Allow the use of SO_REUSEPORT in order to do per-thread processing
    of incoming TCP/UDP connections. The muxing can be done using a
    BPF program which hashes the incoming packet. From Craig Gallek.

    5) Add a multiplexer for TCP streams, to provide a messaged based
    interface. BPF programs can be used to determine the message
    boundaries. From Tom Herbert.

    6) Add 802.1AE MACSEC support, from Sabrina Dubroca.

    7) Avoid factorial complexity when taking down an inetdev interface
    with lots of configured addresses. We were doing things like
    traversing the entire address less for each address removed, and
    flushing the entire netfilter conntrack table for every address as
    well.

    8) Add and use SKB bulk free infrastructure, from Jesper Brouer.

    9) Allow offloading u32 classifiers to hardware, and implement for
    ixgbe, from John Fastabend.

    10) Allow configuring IRQ coalescing parameters on a per-queue basis,
    from Kan Liang.

    11) Extend ethtool so that larger link mode masks can be supported.
    From David Decotigny.

    12) Introduce devlink, which can be used to configure port link types
    (ethernet vs Infiniband, etc.), port splitting, and switch device
    level attributes as a whole. From Jiri Pirko.

    13) Hardware offload support for flower classifiers, from Amir Vadai.

    14) Add "Local Checksum Offload". Basically, for a tunneled packet
    the checksum of the outer header is 'constant' (because with the
    checksum field filled into the inner protocol header, the payload
    of the outer frame checksums to 'zero'), and we can take advantage
    of that in various ways. From Edward Cree"

    * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1548 commits)
    bonding: fix bond_get_stats()
    net: bcmgenet: fix dma api length mismatch
    net/mlx4_core: Fix backward compatibility on VFs
    phy: mdio-thunder: Fix some Kconfig typos
    lan78xx: add ndo_get_stats64
    lan78xx: handle statistics counter rollover
    RDS: TCP: Remove unused constant
    RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
    net: smc911x: convert pxa dma to dmaengine
    team: remove duplicate set of flag IFF_MULTICAST
    bonding: remove duplicate set of flag IFF_MULTICAST
    net: fix a comment typo
    ethernet: micrel: fix some error codes
    ip_tunnels, bpf: define IP_TUNNEL_OPTS_MAX and use it
    bpf, dst: add and use dst_tclassid helper
    bpf: make skb->tc_classid also readable
    net: mvneta: bm: clarify dependencies
    cls_bpf: reset class and reuse major in da
    ldmvsw: Checkpatch sunvnet.c and sunvnet_common.c
    ldmvsw: Add ldmvsw.c driver code
    ...

    Linus Torvalds
     

18 Mar, 2016

1 commit

  • Pull security layer updates from James Morris:
    "There are a bunch of fixes to the TPM, IMA, and Keys code, with minor
    fixes scattered across the subsystem.

    IMA now requires signed policy, and that policy is also now measured
    and appraised"

    * 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security: (67 commits)
    X.509: Make algo identifiers text instead of enum
    akcipher: Move the RSA DER encoding check to the crypto layer
    crypto: Add hash param to pkcs1pad
    sign-file: fix build with CMS support disabled
    MAINTAINERS: update tpmdd urls
    MODSIGN: linux/string.h should be #included to get memcpy()
    certs: Fix misaligned data in extra certificate list
    X.509: Handle midnight alternative notation in GeneralizedTime
    X.509: Support leap seconds
    Handle ISO 8601 leap seconds and encodings of midnight in mktime64()
    X.509: Fix leap year handling again
    PKCS#7: fix unitialized boolean 'want'
    firmware: change kernel read fail to dev_dbg()
    KEYS: Use the symbol value for list size, updated by scripts/insert-sys-cert
    KEYS: Reserve an extra certificate symbol for inserting without recompiling
    modsign: hide openssl output in silent builds
    tpm_tis: fix build warning with tpm_tis_resume
    ima: require signed IMA policy
    ima: measure and appraise the IMA policy itself
    ima: load policy using path
    ...

    Linus Torvalds
     

14 Mar, 2016

2 commits

  • This patch updates csum_ipv6_magic so that it correctly recognizes that
    protocol is a unsigned 8 bit value.

    This will allow us to better understand what limitations may or may not be
    present in how we handle the data. For example there are a number of
    places that call htonl on the protocol value. This is likely not necessary
    and can be replaced with a multiplication by ntohl(1) which will be
    converted to a shift by the compiler.

    Signed-off-by: Alexander Duyck
    Signed-off-by: David S. Miller

    Alexander Duyck
     
  • This patch updates all instances of csum_tcpudp_magic and
    csum_tcpudp_nofold to reflect the types that are usually used as the source
    inputs. For example the protocol field is populated based on nexthdr which
    is actually an unsigned 8 bit value. The length is usually populated based
    on skb->len which is an unsigned integer.

    This addresses an issue in which the IPv6 function csum_ipv6_magic was
    generating a checksum using the full 32b of skb->len while
    csum_tcpudp_magic was only using the lower 16 bits. As a result we could
    run into issues when attempting to adjust the checksum as there was no
    protocol agnostic way to update it.

    With this change the value is still truncated as many architectures use
    "(len + proto) << 8", however this truncation only occurs for values
    greater than 16776960 in length and as such is unlikely to occur as we stop
    the inner headers at ~64K in size.

    I did have to make a few minor changes in the arm, mn10300, nios2, and
    score versions of the function in order to support these changes as they
    were either using things such as an OR to combine the protocol and length,
    or were using ntohs to convert the length which would have truncated the
    value.

    I also updated a few spots in terms of whitespace and type differences for
    the addresses. Most of this was just to make sure all of the definitions
    were in sync going forward.

    Signed-off-by: Alexander Duyck
    Signed-off-by: David S. Miller

    Alexander Duyck
     

10 Feb, 2016

1 commit

  • CONFIG_KEYS_DEBUG_PROC_KEYS is no longer an option as /proc/keys is now
    mandatory if the keyrings facility is enabled (it's used by libkeyutils in
    userspace).

    The defconfig references were removed with:

    perl -p -i -e 's/CONFIG_KEYS_DEBUG_PROC_KEYS=y\n//' \
    `git grep -l CONFIG_KEYS_DEBUG_PROC_KEYS=y`

    and the integrity Kconfig fixed by hand.

    Signed-off-by: David Howells
    cc: Andreas Ziegler
    cc: Dmitry Kasatkin

    David Howells
     

30 Jan, 2016

1 commit

  • Set IORESOURCE_SYSTEM_RAM in flags of resource ranges with
    "System RAM", "Kernel code", "Kernel data", and "Kernel bss".

    Note that:

    - IORESOURCE_SYSRAM (i.e. modifier bit) is set in flags when
    IORESOURCE_MEM is already set. IORESOURCE_SYSTEM_RAM is defined
    as (IORESOURCE_MEM|IORESOURCE_SYSRAM).

    - Some archs do not set 'flags' for children nodes, such as
    "Kernel code". This patch does not change 'flags' in this
    case.

    Signed-off-by: Toshi Kani
    Signed-off-by: Borislav Petkov
    Cc: Andrew Morton
    Cc: Andy Lutomirski
    Cc: Borislav Petkov
    Cc: Brian Gerst
    Cc: Denys Vlasenko
    Cc: H. Peter Anvin
    Cc: Linus Torvalds
    Cc: Luis R. Rodriguez
    Cc: Peter Zijlstra
    Cc: Thomas Gleixner
    Cc: Toshi Kani
    Cc: linux-arch@vger.kernel.org
    Cc: linux-arm-kernel@lists.infradead.org
    Cc: linux-mips@linux-mips.org
    Cc: linux-mm
    Cc: linux-parisc@vger.kernel.org
    Cc: linux-s390@vger.kernel.org
    Cc: linux-sh@vger.kernel.org
    Cc: linuxppc-dev@lists.ozlabs.org
    Cc: sparclinux@vger.kernel.org
    Link: http://lkml.kernel.org/r/1453841853-11383-7-git-send-email-bp@alien8.de
    Signed-off-by: Ingo Molnar

    Toshi Kani
     

04 Oct, 2015

1 commit

  • Pull strscpy string copy function implementation from Chris Metcalf.

    Chris sent this during the merge window, but I waffled back and forth on
    the pull request, which is why it's going in only now.

    The new "strscpy()" function is definitely easier to use and more secure
    than either strncpy() or strlcpy(), both of which are horrible nasty
    interfaces that have serious and irredeemable problems.

    strncpy() has a useless return value, and doesn't NUL-terminate an
    overlong result. To make matters worse, it pads a short result with
    zeroes, which is a performance disaster if you have big buffers.

    strlcpy(), by contrast, is a mis-designed "fix" for strlcpy(), lacking
    the insane NUL padding, but having a differently broken return value
    which returns the original length of the source string. Which means
    that it will read characters past the count from the source buffer, and
    you have to trust the source to be properly terminated. It also makes
    error handling fragile, since the test for overflow is unnecessarily
    subtle.

    strscpy() avoids both these problems, guaranteeing the NUL termination
    (but not excessive padding) if the destination size wasn't zero, and
    making the overflow condition very obvious by returning -E2BIG. It also
    doesn't read past the size of the source, and can thus be used for
    untrusted source data too.

    So why did I waffle about this for so long?

    Every time we introduce a new-and-improved interface, people start doing
    these interminable series of trivial conversion patches.

    And every time that happens, somebody does some silly mistake, and the
    conversion patch to the improved interface actually makes things worse.
    Because the patch is mindnumbing and trivial, nobody has the attention
    span to look at it carefully, and it's usually done over large swatches
    of source code which means that not every conversion gets tested.

    So I'm pulling the strscpy() support because it *is* a better interface.
    But I will refuse to pull mindless conversion patches. Use this in
    places where it makes sense, but don't do trivial patches to fix things
    that aren't actually known to be broken.

    * 'strscpy' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile:
    tile: use global strscpy() rather than private copy
    string: provide strscpy()
    Make asm/word-at-a-time.h available on all architectures

    Linus Torvalds
     

02 Sep, 2015

1 commit

  • Pull timer updates from Thomas Gleixner:
    "Rather large, but nothing exiting:

    - new range check for settimeofday() to prevent that boot time
    becomes negative.
    - fix for file time rounding
    - a few simplifications of the hrtimer code
    - fix for the proc/timerlist code so the output of clock realtime
    timers is accurate
    - more y2038 work
    - tree wide conversion of clockevent drivers to the new callbacks"

    * 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (88 commits)
    hrtimer: Handle failure of tick_init_highres() gracefully
    hrtimer: Unconfuse switch_hrtimer_base() a bit
    hrtimer: Simplify get_target_base() by returning current base
    hrtimer: Drop return code of hrtimer_switch_to_hres()
    time: Introduce timespec64_to_jiffies()/jiffies_to_timespec64()
    time: Introduce current_kernel_time64()
    time: Introduce struct itimerspec64
    time: Add the common weak version of update_persistent_clock()
    time: Always make sure wall_to_monotonic isn't positive
    time: Fix nanosecond file time rounding in timespec_trunc()
    timer_list: Add the base offset so remaining nsecs are accurate for non monotonic timers
    cris/time: Migrate to new 'set-state' interface
    kernel: broadcast-hrtimer: Migrate to new 'set-state' interface
    xtensa/time: Migrate to new 'set-state' interface
    unicore/time: Migrate to new 'set-state' interface
    um/time: Migrate to new 'set-state' interface
    sparc/time: Migrate to new 'set-state' interface
    sh/localtimer: Migrate to new 'set-state' interface
    score/time: Migrate to new 'set-state' interface
    s390/time: Migrate to new 'set-state' interface
    ...

    Linus Torvalds
     

01 Sep, 2015

1 commit

  • Pull scheduler updates from Ingo Molnar:
    "The biggest change in this cycle is the rewrite of the main SMP load
    balancing metric: the CPU load/utilization. The main goal was to make
    the metric more precise and more representative - see the changelog of
    this commit for the gory details:

    9d89c257dfb9 ("sched/fair: Rewrite runnable load and utilization average tracking")

    It is done in a way that significantly reduces complexity of the code:

    5 files changed, 249 insertions(+), 494 deletions(-)

    and the performance testing results are encouraging. Nevertheless we
    need to keep an eye on potential regressions, since this potentially
    affects every SMP workload in existence.

    This work comes from Yuyang Du.

    Other changes:

    - SCHED_DL updates. (Andrea Parri)

    - Simplify architecture callbacks by removing finish_arch_switch().
    (Peter Zijlstra et al)

    - cputime accounting: guarantee stime + utime == rtime. (Peter
    Zijlstra)

    - optimize idle CPU wakeups some more - inspired by Facebook server
    loads. (Mike Galbraith)

    - stop_machine fixes and updates. (Oleg Nesterov)

    - Introduce the 'trace_sched_waking' tracepoint. (Peter Zijlstra)

    - sched/numa tweaks. (Srikar Dronamraju)

    - misc fixes and small cleanups"

    * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
    sched/deadline: Fix comment in enqueue_task_dl()
    sched/deadline: Fix comment in push_dl_tasks()
    sched: Change the sched_class::set_cpus_allowed() calling context
    sched: Make sched_class::set_cpus_allowed() unconditional
    sched: Fix a race between __kthread_bind() and sched_setaffinity()
    sched: Ensure a task has a non-normalized vruntime when returning back to CFS
    sched/numa: Fix NUMA_DIRECT topology identification
    tile: Reorganize _switch_to()
    sched, sparc32: Update scheduler comments in copy_thread()
    sched: Remove finish_arch_switch()
    sched, tile: Remove finish_arch_switch
    sched, sh: Fold finish_arch_switch() into switch_to()
    sched, score: Remove finish_arch_switch()
    sched, avr32: Remove finish_arch_switch()
    sched, MIPS: Get rid of finish_arch_switch()
    sched, arm: Remove finish_arch_switch()
    sched/fair: Clean up load average references
    sched/fair: Provide runnable_load_avg back to cfs_rq
    sched/fair: Remove task and group entity load when they are dead
    sched/fair: Init cfs_rq's sched_entity load average
    ...

    Linus Torvalds
     

10 Aug, 2015

1 commit

  • Migrate score driver to the new 'set-state' interface provided by
    clockevents core, the earlier 'set-mode' interface is marked obsolete
    now.

    This also enables us to implement callbacks for new states of clockevent
    devices, for example: ONESHOT_STOPPED.

    We weren't doing anything in ONESHOT/SHUTDOWN/RESUME modes and so
    callbacks for them aren't implemented.

    Cc: Chen Liqin
    Cc: Lennox Wu
    Cc: Michael Opdenacker
    Signed-off-by: Viresh Kumar
    Signed-off-by: Daniel Lezcano

    Viresh Kumar
     

04 Aug, 2015

1 commit


18 Jul, 2015

1 commit

  • Commit 2ae416b142b6 ("mm: new mm hook framework") introduced an empty
    header file (mm-arch-hooks.h) for every architecture, even those which
    doesn't need to define mm hooks.

    As suggested by Geert Uytterhoeven, this could be cleaned through the use
    of a generic header file included via each per architecture
    asm/include/Kbuild file.

    The PowerPC architecture is not impacted here since this architecture has
    to defined the arch_remap MM hook.

    Signed-off-by: Laurent Dufour
    Suggested-by: Geert Uytterhoeven
    Acked-by: Geert Uytterhoeven
    Acked-by: Vineet Gupta
    Cc: Oleg Nesterov
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Laurent Dufour
     

09 Jul, 2015

1 commit

  • Added the x86 implementation of word-at-a-time to the
    generic version, which previously only supported big-endian.

    Omitted the x86-specific load_unaligned_zeropad(), which in
    any case is also not present for the existing BE-only
    implementation of a word-at-a-time, and is only used under
    CONFIG_DCACHE_WORD_ACCESS.

    Added as a "generic-y" to the Kbuilds of all architectures
    that didn't previously have it.

    Signed-off-by: Chris Metcalf

    Chris Metcalf
     

26 Jun, 2015

1 commit


25 Jun, 2015

1 commit

  • CRIU is recreating the process memory layout by remapping the checkpointee
    memory area on top of the current process (criu). This includes remapping
    the vDSO to the place it has at checkpoint time.

    However some architectures like powerpc are keeping a reference to the
    vDSO base address to build the signal return stack frame by calling the
    vDSO sigreturn service. So once the vDSO has been moved, this reference
    is no more valid and the signal frame built later are not usable.

    This patch serie is introducing a new mm hook framework, and a new
    arch_remap hook which is called when mremap is done and the mm lock still
    hold. The next patch is adding the vDSO remap and unmap tracking to the
    powerpc architecture.

    This patch (of 3):

    This patch introduces a new set of header file to manage mm hooks:
    - per architecture empty header file (arch/x/include/asm/mm-arch-hooks.h)
    - a generic header (include/linux/mm-arch-hooks.h)

    The architecture which need to overwrite a hook as to redefine it in its
    header file, while architecture which doesn't need have nothing to do.

    The default hooks are defined in the generic header and are used in the
    case the architecture is not defining it.

    In a next step, mm hooks defined in include/asm-generic/mm_hooks.h should
    be moved here.

    Signed-off-by: Laurent Dufour
    Suggested-by: Andrew Morton
    Cc: "Kirill A. Shutemov"
    Cc: Hugh Dickins
    Cc: Rik van Riel
    Cc: Mel Gorman
    Cc: Pavel Emelyanov
    Cc: Benjamin Herrenschmidt
    Cc: Paul Mackerras
    Cc: Michael Ellerman
    Cc: Ingo Molnar
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Laurent Dufour
     

23 Jun, 2015

2 commits

  • Pull scheduler updates from Ingo Molnar:
    "The main changes are:

    - lockless wakeup support for futexes and IPC message queues
    (Davidlohr Bueso, Peter Zijlstra)

    - Replace spinlocks with atomics in thread_group_cputimer(), to
    improve scalability (Jason Low)

    - NUMA balancing improvements (Rik van Riel)

    - SCHED_DEADLINE improvements (Wanpeng Li)

    - clean up and reorganize preemption helpers (Frederic Weisbecker)

    - decouple page fault disabling machinery from the preemption
    counter, to improve debuggability and robustness (David
    Hildenbrand)

    - SCHED_DEADLINE documentation updates (Luca Abeni)

    - topology CPU masks cleanups (Bartosz Golaszewski)

    - /proc/sched_debug improvements (Srikar Dronamraju)"

    * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (79 commits)
    sched/deadline: Remove needless parameter in dl_runtime_exceeded()
    sched: Remove superfluous resetting of the p->dl_throttled flag
    sched/deadline: Drop duplicate init_sched_dl_class() declaration
    sched/deadline: Reduce rq lock contention by eliminating locking of non-feasible target
    sched/deadline: Make init_sched_dl_class() __init
    sched/deadline: Optimize pull_dl_task()
    sched/preempt: Add static_key() to preempt_notifiers
    sched/preempt: Fix preempt notifiers documentation about hlist_del() within unsafe iteration
    sched/stop_machine: Fix deadlock between multiple stop_two_cpus()
    sched/debug: Add sum_sleep_runtime to /proc//sched
    sched/debug: Replace vruntime with wait_sum in /proc/sched_debug
    sched/debug: Properly format runnable tasks in /proc/sched_debug
    sched/numa: Only consider less busy nodes as numa balancing destinations
    Revert 095bebf61a46 ("sched/numa: Do not move past the balance point if unbalanced")
    sched/fair: Prevent throttling in early pick_next_task_fair()
    preempt: Reorganize the notrace definitions a bit
    preempt: Use preempt_schedule_context() as the official tracing preemption point
    sched: Make preempt_schedule_context() function-tracing safe
    x86: Remove cpu_sibling_mask() and cpu_core_mask()
    x86: Replace cpu_**_mask() with topology_**_cpumask()
    ...

    Linus Torvalds
     
  • Pull locking updates from Ingo Molnar:
    "The main changes are:

    - 'qspinlock' support, enabled on x86: queued spinlocks - these are
    now the spinlock variant used by x86 as they outperform ticket
    spinlocks in every category. (Waiman Long)

    - 'pvqspinlock' support on x86: paravirtualized variant of queued
    spinlocks. (Waiman Long, Peter Zijlstra)

    - 'qrwlock' support, enabled on x86: queued rwlocks. Similar to
    queued spinlocks, they are now the variant used by x86:

    CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
    CONFIG_QUEUED_SPINLOCKS=y
    CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
    CONFIG_QUEUED_RWLOCKS=y

    - various lockdep fixlets

    - various locking primitives cleanups, further WRITE_ONCE()
    propagation"

    * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
    locking/lockdep: Remove hard coded array size dependency
    locking/qrwlock: Don't contend with readers when setting _QW_WAITING
    lockdep: Do not break user-visible string
    locking/arch: Rename set_mb() to smp_store_mb()
    locking/arch: Add WRITE_ONCE() to set_mb()
    rtmutex: Warn if trylock is called from hard/softirq context
    arch: Remove __ARCH_HAVE_CMPXCHG
    locking/rtmutex: Drop usage of __HAVE_ARCH_CMPXCHG
    locking/qrwlock: Rename QUEUE_RWLOCK to QUEUED_RWLOCKS
    locking/pvqspinlock: Rename QUEUED_SPINLOCK to QUEUED_SPINLOCKS
    locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()
    locking/pvqspinlock, x86: Enable PV qspinlock for Xen
    locking/pvqspinlock, x86: Enable PV qspinlock for KVM
    locking/pvqspinlock, x86: Implement the paravirt qspinlock call patching
    locking/pvqspinlock: Implement simple paravirt support for the qspinlock
    locking/qspinlock: Revert to test-and-set on hypervisors
    locking/qspinlock: Use a simple write to grab the lock
    locking/qspinlock: Optimize for smaller NR_CPUS
    locking/qspinlock: Extract out code snippets for the next patch
    locking/qspinlock: Add pending bit
    ...

    Linus Torvalds
     

11 Jun, 2015

1 commit

  • The latest version of modinfo fails to compile score architecture
    targets with the following error.

    FATAL: The relocation at __ex_table+0x634 references
    section "__ex_table" which is not executable, IOW
    the kernel will fault if it ever tries to
    jump to it. Something is seriously wrong
    and should be fixed.

    The probem is caused by a bad label in an __ex_table entry.

    Acked-by: Lennox Wu
    Cc: Quentin Casasnovas
    Signed-off-by: Guenter Roeck

    Guenter Roeck
     

19 May, 2015

3 commits

  • Signed-off-by: Christoph Hellwig
    Reported-by: Geert Uytterhoeven
    Signed-off-by: Jens Axboe

    Christoph Hellwig
     
  • Introduce faulthandler_disabled() and use it to check for irq context and
    disabled pagefaults (via pagefault_disable()) in the pagefault handlers.

    Please note that we keep the in_atomic() checks in place - to detect
    whether in irq context (in which case preemption is always properly
    disabled).

    In contrast, preempt_disable() should never be used to disable pagefaults.
    With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt
    counter, and therefore the result of in_atomic() differs.
    We validate that condition by using might_fault() checks when calling
    might_sleep().

    Therefore, add a comment to faulthandler_disabled(), describing why this
    is needed.

    faulthandler_disabled() and pagefault_disable() are defined in
    linux/uaccess.h, so let's properly add that include to all relevant files.

    This patch is based on a patch from Thomas Gleixner.

    Reviewed-and-tested-by: Thomas Gleixner
    Signed-off-by: David Hildenbrand
    Signed-off-by: Peter Zijlstra (Intel)
    Cc: David.Laight@ACULAB.COM
    Cc: Linus Torvalds
    Cc: Peter Zijlstra
    Cc: Thomas Gleixner
    Cc: airlied@linux.ie
    Cc: akpm@linux-foundation.org
    Cc: benh@kernel.crashing.org
    Cc: bigeasy@linutronix.de
    Cc: borntraeger@de.ibm.com
    Cc: daniel.vetter@intel.com
    Cc: heiko.carstens@de.ibm.com
    Cc: herbert@gondor.apana.org.au
    Cc: hocko@suse.cz
    Cc: hughd@google.com
    Cc: mst@redhat.com
    Cc: paulus@samba.org
    Cc: ralf@linux-mips.org
    Cc: schwidefsky@de.ibm.com
    Cc: yang.shi@windriver.com
    Link: http://lkml.kernel.org/r/1431359540-32227-7-git-send-email-dahi@linux.vnet.ibm.com
    Signed-off-by: Ingo Molnar

    David Hildenbrand
     
  • In general, non-atomic variants of user access functions must not sleep
    if pagefaults are disabled.

    Let's update all relevant comments in uaccess code. This also reflects
    the might_sleep() checks in might_fault().

    Reviewed-and-tested-by: Thomas Gleixner
    Signed-off-by: David Hildenbrand
    Signed-off-by: Peter Zijlstra (Intel)
    Cc: David.Laight@ACULAB.COM
    Cc: Linus Torvalds
    Cc: Peter Zijlstra
    Cc: Thomas Gleixner
    Cc: airlied@linux.ie
    Cc: akpm@linux-foundation.org
    Cc: benh@kernel.crashing.org
    Cc: bigeasy@linutronix.de
    Cc: borntraeger@de.ibm.com
    Cc: daniel.vetter@intel.com
    Cc: heiko.carstens@de.ibm.com
    Cc: herbert@gondor.apana.org.au
    Cc: hocko@suse.cz
    Cc: hughd@google.com
    Cc: mst@redhat.com
    Cc: paulus@samba.org
    Cc: ralf@linux-mips.org
    Cc: schwidefsky@de.ibm.com
    Cc: yang.shi@windriver.com
    Link: http://lkml.kernel.org/r/1431359540-32227-4-git-send-email-dahi@linux.vnet.ibm.com
    Signed-off-by: Ingo Molnar

    David Hildenbrand
     

13 May, 2015

1 commit


13 Apr, 2015

1 commit


13 Feb, 2015

1 commit

  • If an attacker can cause a controlled kernel stack overflow, overwriting
    the restart block is a very juicy exploit target. This is because the
    restart_block is held in the same memory allocation as the kernel stack.

    Moving the restart block to struct task_struct prevents this exploit by
    making the restart_block harder to locate.

    Note that there are other fields in thread_info that are also easy
    targets, at least on some architectures.

    It's also a decent simplification, since the restart code is more or less
    identical on all architectures.

    [james.hogan@imgtec.com: metag: align thread_info::supervisor_stack]
    Signed-off-by: Andy Lutomirski
    Cc: Thomas Gleixner
    Cc: Al Viro
    Cc: "H. Peter Anvin"
    Cc: Ingo Molnar
    Cc: Kees Cook
    Cc: David Miller
    Acked-by: Richard Weinberger
    Cc: Richard Henderson
    Cc: Ivan Kokshaysky
    Cc: Matt Turner
    Cc: Vineet Gupta
    Cc: Russell King
    Cc: Catalin Marinas
    Cc: Will Deacon
    Cc: Haavard Skinnemoen
    Cc: Hans-Christian Egtvedt
    Cc: Steven Miao
    Cc: Mark Salter
    Cc: Aurelien Jacquiot
    Cc: Mikael Starvik
    Cc: Jesper Nilsson
    Cc: David Howells
    Cc: Richard Kuo
    Cc: "Luck, Tony"
    Cc: Geert Uytterhoeven
    Cc: Michal Simek
    Cc: Ralf Baechle
    Cc: Jonas Bonn
    Cc: "James E.J. Bottomley"
    Cc: Helge Deller
    Cc: Benjamin Herrenschmidt
    Cc: Paul Mackerras
    Acked-by: Michael Ellerman (powerpc)
    Tested-by: Michael Ellerman (powerpc)
    Cc: Martin Schwidefsky
    Cc: Heiko Carstens
    Cc: Chen Liqin
    Cc: Lennox Wu
    Cc: Chris Metcalf
    Cc: Guan Xuetao
    Cc: Chris Zankel
    Cc: Max Filippov
    Cc: Oleg Nesterov
    Cc: Guenter Roeck
    Signed-off-by: James Hogan
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Andy Lutomirski
     

12 Feb, 2015

1 commit

  • LKP has triggered a compiler warning after my recent patch "mm: account
    pmd page tables to the process":

    mm/mmap.c: In function 'exit_mmap':
    >> mm/mmap.c:2857:2: warning: right shift count >= width of type [enabled by default]

    The code:

    > 2857 WARN_ON(mm_nr_pmds(mm) >
    2858 round_up(FIRST_USER_ADDRESS, PUD_SIZE) >> PUD_SHIFT);

    In this, on tile, we have FIRST_USER_ADDRESS defined as 0. round_up() has
    the same type -- int. PUD_SHIFT.

    I think the best way to fix it is to define FIRST_USER_ADDRESS as unsigned
    long. On every arch for consistency.

    Signed-off-by: Kirill A. Shutemov
    Reported-by: Wu Fengguang
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Kirill A. Shutemov
     

11 Feb, 2015

1 commit


30 Jan, 2015

1 commit

  • The core VM already knows about VM_FAULT_SIGBUS, but cannot return a
    "you should SIGSEGV" error, because the SIGSEGV case was generally
    handled by the caller - usually the architecture fault handler.

    That results in lots of duplication - all the architecture fault
    handlers end up doing very similar "look up vma, check permissions, do
    retries etc" - but it generally works. However, there are cases where
    the VM actually wants to SIGSEGV, and applications _expect_ SIGSEGV.

    In particular, when accessing the stack guard page, libsigsegv expects a
    SIGSEGV. And it usually got one, because the stack growth is handled by
    that duplicated architecture fault handler.

    However, when the generic VM layer started propagating the error return
    from the stack expansion in commit fee7e49d4514 ("mm: propagate error
    from stack expansion even for guard page"), that now exposed the
    existing VM_FAULT_SIGBUS result to user space. And user space really
    expected SIGSEGV, not SIGBUS.

    To fix that case, we need to add a VM_FAULT_SIGSEGV, and teach all those
    duplicate architecture fault handlers about it. They all already have
    the code to handle SIGSEGV, so it's about just tying that new return
    value to the existing code, but it's all a bit annoying.

    This is the mindless minimal patch to do this. A more extensive patch
    would be to try to gather up the mostly shared fault handling logic into
    one generic helper routine, and long-term we really should do that
    cleanup.

    Just from this patch, you can generally see that most architectures just
    copied (directly or indirectly) the old x86 way of doing things, but in
    the meantime that original x86 model has been improved to hold the VM
    semaphore for shorter times etc and to handle VM_FAULT_RETRY and other
    "newer" things, so it would be a good idea to bring all those
    improvements to the generic case and teach other architectures about
    them too.

    Reported-and-tested-by: Takashi Iwai
    Tested-by: Jan Engelhardt
    Acked-by: Heiko Carstens # "s390 still compiles and boots"
    Cc: linux-arch@vger.kernel.org
    Cc: stable@vger.kernel.org
    Signed-off-by: Linus Torvalds

    Linus Torvalds
     

11 Dec, 2014

1 commit

  • As there are now no remaining users of arch_fast_hash(), lets kill
    it entirely.

    This basically reverts commit 71ae8aac3e19 ("lib: introduce arch
    optimized hash library") and follow-up work, that is f.e., commit
    237217546d44 ("lib: hash: follow-up fixups for arch hash"),
    commit e3fec2f74f7f ("lib: Add missing arch generic-y entries for
    asm-generic/hash.h") and last but not least commit 6a02652df511
    ("perf tools: Fix include for non x86 architectures").

    Cc: Francesco Fusco
    Cc: Thomas Graf
    Cc: Arnaldo Carvalho de Melo
    Signed-off-by: Daniel Borkmann
    Signed-off-by: David S. Miller

    Daniel Borkmann
     

10 Oct, 2014

1 commit


09 Oct, 2014

1 commit

  • Pull timer fixes from Ingo Molnar:
    "Main changes:

    - Fix the deadlock reported by Dave Jones et al
    - Clean up and fix nohz_full interaction with arch abilities
    - nohz init code consolidation/cleanup"

    * 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
    nohz: nohz full depends on irq work self IPI support
    nohz: Consolidate nohz full init code
    arm64: Tell irq work about self IPI support
    arm: Tell irq work about self IPI support
    x86: Tell irq work about self IPI support
    irq_work: Force raised irq work to run on irq work interrupt
    irq_work: Introduce arch_irq_work_has_interrupt()
    nohz: Move nohz full init call to tick init

    Linus Torvalds
     

05 Oct, 2014

5 commits

  • The symbol is an orphan, get rid of it.

    Signed-off-by: Richard Weinberger
    Acked-by: Lennox Wu
    Cc: Paul Bolle
    [Guenter Roeck: Merge with 3.17-rc3; update headline]
    Signed-off-by: Guenter Roeck

    Richard Weinberger
     
  • The related error (with allmodconfig under score):

    CC [M] drivers/staging/speakup/speakup_acntpc.o
    In file included from drivers/staging/speakup/speakup_acntpc.c:33:0:
    drivers/staging/speakup/serialio.h:7:24: fatal error: asm/serial.h: No such file or directory
    #include
    ^
    compilation terminated.
    make[3]: *** [drivers/staging/speakup/speakup_acntpc.o] Error 1
    make[2]: *** [drivers/staging/speakup] Error 2
    make[1]: *** [drivers/staging] Error 2
    make: *** [drivers] Error 2

    Acked-by: Lennox Wu
    Signed-off-by: Chen Gang

    Chen Gang
     
  • This patch removes the use of the IRQF_DISABLED flag
    from arch/score/kernel/time.c

    It's a NOOP since 2.6.35 and it will be removed one day.

    Signed-off-by: Michael Opdenacker
    Acked-by: Lennox Wu

    Michael Opdenacker
     
  • The related error (with allmodconfig under score):

    MODPOST 1365 modules
    ERROR: "flush_icache_range" [drivers/misc/lkdtm.ko] undefined!

    Acked-by: Lennox Wu
    Signed-off-by: Chen Gang

    Chen Gang
     
  • 'csum_partial_copy_from_user' and 'flush_dcache_page' are also needed by
    outside modules, so need export them in the related files.

    The related error (with allmodconfig under score):

    MODPOST 1365 modules
    ERROR: "csum_partial_copy_from_user" [net/rxrpc/af-rxrpc.ko] undefined!
    ERROR: "flush_dcache_page" [net/sunrpc/sunrpc.ko] undefined!

    Acked-by: Lennox Wu
    Signed-off-by: Chen Gang

    Chen Gang
     

14 Sep, 2014

1 commit

  • The nohz full code needs irq work to trigger its own interrupt so that
    the subsystem can work even when the tick is stopped.

    Lets introduce arch_irq_work_has_interrupt() that archs can override to
    tell about their support for this ability.

    Signed-off-by: Peter Zijlstra
    Cc: Ingo Molnar
    Cc: Paul E. McKenney
    Cc: Peter Zijlstra
    Cc: Thomas Gleixner
    Signed-off-by: Frederic Weisbecker

    Peter Zijlstra