20 Nov, 2008

1 commit

  • Introduce a new accept4() system call. The addition of this system call
    matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
    inotify_init1(), epoll_create1(), pipe2()) which added new system calls
    that differed from analogous traditional system calls in adding a flags
    argument that can be used to access additional functionality.

    The accept4() system call is exactly the same as accept(), except that
    it adds a flags bit-mask argument. Two flags are initially implemented.
    (Most of the new system calls in 2.6.27 also had both of these flags.)

    SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
    for the new file descriptor returned by accept4(). This is a useful
    security feature to avoid leaking information in a multithreaded
    program where one thread is doing an accept() at the same time as
    another thread is doing a fork() plus exec(). More details here:
    http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
    Ulrich Drepper).

    The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
    to be enabled on the new open file description created by accept4().
    (This flag is merely a convenience, saving the use of additional calls
    fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.

    Here's a test program. Works on x86-32. Should work on x86-64, but
    I (mtk) don't have a system to hand to test with.

    It tests accept4() with each of the four possible combinations of
    SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
    that the appropriate flags are set on the file descriptor/open file
    description returned by accept4().

    I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
    and it passes according to my test program.

    /* test_accept4.c

    Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk

    Licensed under the GNU GPLv2 or later.
    */
    #define _GNU_SOURCE
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include

    #define PORT_NUM 33333

    #define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

    /**********************************************************************/

    /* The following is what we need until glibc gets a wrapper for
    accept4() */

    /* Flags for socket(), socketpair(), accept4() */
    #ifndef SOCK_CLOEXEC
    #define SOCK_CLOEXEC O_CLOEXEC
    #endif
    #ifndef SOCK_NONBLOCK
    #define SOCK_NONBLOCK O_NONBLOCK
    #endif

    #ifdef __x86_64__
    #define SYS_accept4 288
    #elif __i386__
    #define USE_SOCKETCALL 1
    #define SYS_ACCEPT4 18
    #else
    #error "Sorry -- don't know the syscall # on this architecture"
    #endif

    static int
    accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
    {
    printf("Calling accept4(): flags = %x", flags);
    if (flags != 0) {
    printf(" (");
    if (flags & SOCK_CLOEXEC)
    printf("SOCK_CLOEXEC");
    if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
    printf(" ");
    if (flags & SOCK_NONBLOCK)
    printf("SOCK_NONBLOCK");
    printf(")");
    }
    printf("\n");

    #if USE_SOCKETCALL
    long args[6];

    args[0] = fd;
    args[1] = (long) sockaddr;
    args[2] = (long) addrlen;
    args[3] = flags;

    return syscall(SYS_socketcall, SYS_ACCEPT4, args);
    #else
    return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
    #endif
    }

    /**********************************************************************/

    static int
    do_test(int lfd, struct sockaddr_in *conn_addr,
    int closeonexec_flag, int nonblock_flag)
    {
    int connfd, acceptfd;
    int fdf, flf, fdf_pass, flf_pass;
    struct sockaddr_in claddr;
    socklen_t addrlen;

    printf("=======================================\n");

    connfd = socket(AF_INET, SOCK_STREAM, 0);
    if (connfd == -1)
    die("socket");
    if (connect(connfd, (struct sockaddr *) conn_addr,
    sizeof(struct sockaddr_in)) == -1)
    die("connect");

    addrlen = sizeof(struct sockaddr_in);
    acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
    closeonexec_flag | nonblock_flag);
    if (acceptfd == -1) {
    perror("accept4()");
    close(connfd);
    return 0;
    }

    fdf = fcntl(acceptfd, F_GETFD);
    if (fdf == -1)
    die("fcntl:F_GETFD");
    fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
    ((closeonexec_flag & SOCK_CLOEXEC) != 0);
    printf("Close-on-exec flag is %sset (%s); ",
    (fdf & FD_CLOEXEC) ? "" : "not ",
    fdf_pass ? "OK" : "failed");

    flf = fcntl(acceptfd, F_GETFL);
    if (flf == -1)
    die("fcntl:F_GETFD");
    flf_pass = ((flf & O_NONBLOCK) != 0) ==
    ((nonblock_flag & SOCK_NONBLOCK) !=0);
    printf("nonblock flag is %sset (%s)\n",
    (flf & O_NONBLOCK) ? "" : "not ",
    flf_pass ? "OK" : "failed");

    close(acceptfd);
    close(connfd);

    printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
    return fdf_pass && flf_pass;
    }

    static int
    create_listening_socket(int port_num)
    {
    struct sockaddr_in svaddr;
    int lfd;
    int optval;

    memset(&svaddr, 0, sizeof(struct sockaddr_in));
    svaddr.sin_family = AF_INET;
    svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svaddr.sin_port = htons(port_num);

    lfd = socket(AF_INET, SOCK_STREAM, 0);
    if (lfd == -1)
    die("socket");

    optval = 1;
    if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
    sizeof(optval)) == -1)
    die("setsockopt");

    if (bind(lfd, (struct sockaddr *) &svaddr,
    sizeof(struct sockaddr_in)) == -1)
    die("bind");

    if (listen(lfd, 5) == -1)
    die("listen");

    return lfd;
    }

    int
    main(int argc, char *argv[])
    {
    struct sockaddr_in conn_addr;
    int lfd;
    int port_num;
    int passed;

    passed = 1;

    port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

    memset(&conn_addr, 0, sizeof(struct sockaddr_in));
    conn_addr.sin_family = AF_INET;
    conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    conn_addr.sin_port = htons(port_num);

    lfd = create_listening_socket(port_num);

    if (!do_test(lfd, &conn_addr, 0, 0))
    passed = 0;
    if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
    passed = 0;
    if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
    passed = 0;
    if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
    passed = 0;

    close(lfd);

    exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
    }

    [mtk.manpages@gmail.com: rewrote changelog, updated test program]
    Signed-off-by: Ulrich Drepper
    Tested-by: Michael Kerrisk
    Acked-by: Michael Kerrisk
    Cc:
    Cc:
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Ulrich Drepper
     

27 Aug, 2008

1 commit

  • Including in the user-visible part of this header has
    caused build regressions with headers from 2.6.27-rc. Move it down to
    the #ifdef __KERNEL__ part, which is the only place it's needed. Move
    some other kernel-only things down there too, while we're at it.

    Signed-off-by: David Woodhouse
    Signed-off-by: Linus Torvalds

    David Woodhouse
     

26 Jul, 2008

1 commit

  • All ratelimit user use same jiffies and burst params, so some messages
    (callbacks) will be lost.

    For example:
    a call printk_ratelimit(5 * HZ, 1)
    b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will
    will be supressed.

    - rewrite __ratelimit, and use a ratelimit_state as parameter. Thanks for
    hints from andrew.

    - Add WARN_ON_RATELIMIT, update rcupreempt.h

    - remove __printk_ratelimit

    - use __ratelimit in net_ratelimit

    Signed-off-by: Dave Young
    Cc: "David S. Miller"
    Cc: "Paul E. McKenney"
    Cc: Dave Young
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Dave Young
     

25 Jul, 2008

4 commits

  • This patch introduces support for the SOCK_NONBLOCK flag in socket,
    socketpair, and paccept. To do this the internal function sock_attach_fd
    gets an additional parameter which it uses to set the appropriate flag for
    the file descriptor.

    Given that in modern, scalable programs almost all socket connections are
    non-blocking and the minimal additional cost for the new functionality
    I see no reason not to add this code.

    The following test must be adjusted for architectures other than x86 and
    x86-64 and in case the syscall numbers changed.

    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #include
    #include
    #include
    #include
    #include
    #include
    #include

    #ifndef __NR_paccept
    # ifdef __x86_64__
    # define __NR_paccept 288
    # elif defined __i386__
    # define SYS_PACCEPT 18
    # define USE_SOCKETCALL 1
    # else
    # error "need __NR_paccept"
    # endif
    #endif

    #ifdef USE_SOCKETCALL
    # define paccept(fd, addr, addrlen, mask, flags) \
    ({ long args[6] = { \
    (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
    syscall (__NR_socketcall, SYS_PACCEPT, args); })
    #else
    # define paccept(fd, addr, addrlen, mask, flags) \
    syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
    #endif

    #define PORT 57392

    #define SOCK_NONBLOCK O_NONBLOCK

    static pthread_barrier_t b;

    static void *
    tf (void *arg)
    {
    pthread_barrier_wait (&b);
    int s = socket (AF_INET, SOCK_STREAM, 0);
    struct sockaddr_in sin;
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
    sin.sin_port = htons (PORT);
    connect (s, (const struct sockaddr *) &sin, sizeof (sin));
    close (s);
    pthread_barrier_wait (&b);

    pthread_barrier_wait (&b);
    s = socket (AF_INET, SOCK_STREAM, 0);
    sin.sin_port = htons (PORT);
    connect (s, (const struct sockaddr *) &sin, sizeof (sin));
    close (s);
    pthread_barrier_wait (&b);

    return NULL;
    }

    int
    main (void)
    {
    int fd;
    fd = socket (PF_INET, SOCK_STREAM, 0);
    if (fd == -1)
    {
    puts ("socket(0) failed");
    return 1;
    }
    int fl = fcntl (fd, F_GETFL);
    if (fl == -1)
    {
    puts ("fcntl failed");
    return 1;
    }
    if (fl & O_NONBLOCK)
    {
    puts ("socket(0) set non-blocking mode");
    return 1;
    }
    close (fd);

    fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
    if (fd == -1)
    {
    puts ("socket(SOCK_NONBLOCK) failed");
    return 1;
    }
    fl = fcntl (fd, F_GETFL);
    if (fl == -1)
    {
    puts ("fcntl failed");
    return 1;
    }
    if ((fl & O_NONBLOCK) == 0)
    {
    puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
    return 1;
    }
    close (fd);

    int fds[2];
    if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
    puts ("socketpair(0) failed");
    return 1;
    }
    for (int i = 0; i < 2; ++i)
    {
    fl = fcntl (fds[i], F_GETFL);
    if (fl == -1)
    {
    puts ("fcntl failed");
    return 1;
    }
    if (fl & O_NONBLOCK)
    {
    printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
    return 1;
    }
    close (fds[i]);
    }

    if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
    {
    puts ("socketpair(SOCK_NONBLOCK) failed");
    return 1;
    }
    for (int i = 0; i < 2; ++i)
    {
    fl = fcntl (fds[i], F_GETFL);
    if (fl == -1)
    {
    puts ("fcntl failed");
    return 1;
    }
    if ((fl & O_NONBLOCK) == 0)
    {
    printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
    return 1;
    }
    close (fds[i]);
    }

    pthread_barrier_init (&b, NULL, 2);

    struct sockaddr_in sin;
    pthread_t th;
    if (pthread_create (&th, NULL, tf, NULL) != 0)
    {
    puts ("pthread_create failed");
    return 1;
    }

    int s = socket (AF_INET, SOCK_STREAM, 0);
    int reuse = 1;
    setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
    sin.sin_port = htons (PORT);
    bind (s, (struct sockaddr *) &sin, sizeof (sin));
    listen (s, SOMAXCONN);

    pthread_barrier_wait (&b);

    int s2 = paccept (s, NULL, 0, NULL, 0);
    if (s2 < 0)
    {
    puts ("paccept(0) failed");
    return 1;
    }

    fl = fcntl (s2, F_GETFL);
    if (fl & O_NONBLOCK)
    {
    puts ("paccept(0) set non-blocking mode");
    return 1;
    }
    close (s2);
    close (s);

    pthread_barrier_wait (&b);

    s = socket (AF_INET, SOCK_STREAM, 0);
    sin.sin_port = htons (PORT);
    setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
    bind (s, (struct sockaddr *) &sin, sizeof (sin));
    listen (s, SOMAXCONN);

    pthread_barrier_wait (&b);

    s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
    if (s2 < 0)
    {
    puts ("paccept(SOCK_NONBLOCK) failed");
    return 1;
    }

    fl = fcntl (s2, F_GETFL);
    if ((fl & O_NONBLOCK) == 0)
    {
    puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
    return 1;
    }
    close (s2);
    close (s);

    pthread_barrier_wait (&b);
    puts ("OK");

    return 0;
    }
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    Signed-off-by: Ulrich Drepper
    Acked-by: Davide Libenzi
    Cc: Michael Kerrisk
    Cc: "David S. Miller"
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Ulrich Drepper
     
  • Some platforms do not have support to restore the signal mask in the
    return path from a syscall. For those platforms syscalls like pselect are
    not defined at all. This is, I think, not a good choice for paccept()
    since paccept() adds more value on top of accept() than just the signal
    mask handling.

    Therefore this patch defines a scaled down version of the sys_paccept
    function for those platforms. It returns -EINVAL in case the signal mask
    is non-NULL but behaves the same otherwise.

    Note that I explicitly included . I saw that it is
    currently included but indirectly two levels down. There is too much risk
    in relying on this. The header might change and then suddenly the
    function definition would change without anyone immediately noticing.

    Signed-off-by: Ulrich Drepper
    Cc: Davide Libenzi
    Cc: Michael Kerrisk
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Ulrich Drepper
     
  • This patch is by far the most complex in the series. It adds a new syscall
    paccept. This syscall differs from accept in that it adds (at the userlevel)
    two additional parameters:

    - a signal mask
    - a flags value

    The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
    imlpemented here as well. Some people argued that this is a property which
    should be inherited from the file desriptor for the server but this is against
    POSIX. Additionally, we really want the signal mask parameter as well
    (similar to pselect, ppoll, etc). So an interface change in inevitable.

    The flag value is the same as for socket and socketpair. I think diverging
    here will only create confusion. Similar to the filesystem interfaces where
    the use of the O_* constants differs, it is acceptable here.

    The signal mask is handled as for pselect etc. The mask is temporarily
    installed for the thread and removed before the call returns. I modeled the
    code after pselect. If there is a problem it's likely also in pselect.

    For architectures which use socketcall I maintained this interface instead of
    adding a system call. The symmetry shouldn't be broken.

    The following test must be adjusted for architectures other than x86 and
    x86-64 and in case the syscall numbers changed.

    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include

    #ifndef __NR_paccept
    # ifdef __x86_64__
    # define __NR_paccept 288
    # elif defined __i386__
    # define SYS_PACCEPT 18
    # define USE_SOCKETCALL 1
    # else
    # error "need __NR_paccept"
    # endif
    #endif

    #ifdef USE_SOCKETCALL
    # define paccept(fd, addr, addrlen, mask, flags) \
    ({ long args[6] = { \
    (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
    syscall (__NR_socketcall, SYS_PACCEPT, args); })
    #else
    # define paccept(fd, addr, addrlen, mask, flags) \
    syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
    #endif

    #define PORT 57392

    #define SOCK_CLOEXEC O_CLOEXEC

    static pthread_barrier_t b;

    static void *
    tf (void *arg)
    {
    pthread_barrier_wait (&b);
    int s = socket (AF_INET, SOCK_STREAM, 0);
    struct sockaddr_in sin;
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
    sin.sin_port = htons (PORT);
    connect (s, (const struct sockaddr *) &sin, sizeof (sin));
    close (s);

    pthread_barrier_wait (&b);
    s = socket (AF_INET, SOCK_STREAM, 0);
    sin.sin_port = htons (PORT);
    connect (s, (const struct sockaddr *) &sin, sizeof (sin));
    close (s);
    pthread_barrier_wait (&b);

    pthread_barrier_wait (&b);
    sleep (2);
    pthread_kill ((pthread_t) arg, SIGUSR1);

    return NULL;
    }

    static void
    handler (int s)
    {
    }

    int
    main (void)
    {
    pthread_barrier_init (&b, NULL, 2);

    struct sockaddr_in sin;
    pthread_t th;
    if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
    {
    puts ("pthread_create failed");
    return 1;
    }

    int s = socket (AF_INET, SOCK_STREAM, 0);
    int reuse = 1;
    setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
    sin.sin_port = htons (PORT);
    bind (s, (struct sockaddr *) &sin, sizeof (sin));
    listen (s, SOMAXCONN);

    pthread_barrier_wait (&b);

    int s2 = paccept (s, NULL, 0, NULL, 0);
    if (s2 < 0)
    {
    puts ("paccept(0) failed");
    return 1;
    }

    int coe = fcntl (s2, F_GETFD);
    if (coe & FD_CLOEXEC)
    {
    puts ("paccept(0) set close-on-exec-flag");
    return 1;
    }
    close (s2);

    pthread_barrier_wait (&b);

    s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
    if (s2 < 0)
    {
    puts ("paccept(SOCK_CLOEXEC) failed");
    return 1;
    }

    coe = fcntl (s2, F_GETFD);
    if ((coe & FD_CLOEXEC) == 0)
    {
    puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
    return 1;
    }
    close (s2);

    pthread_barrier_wait (&b);

    struct sigaction sa;
    sa.sa_handler = handler;
    sa.sa_flags = 0;
    sigemptyset (&sa.sa_mask);
    sigaction (SIGUSR1, &sa, NULL);

    sigset_t ss;
    pthread_sigmask (SIG_SETMASK, NULL, &ss);
    sigaddset (&ss, SIGUSR1);
    pthread_sigmask (SIG_SETMASK, &ss, NULL);

    sigdelset (&ss, SIGUSR1);
    alarm (4);
    pthread_barrier_wait (&b);

    errno = 0 ;
    s2 = paccept (s, NULL, 0, &ss, 0);
    if (s2 != -1 || errno != EINTR)
    {
    puts ("paccept did not fail with EINTR");
    return 1;
    }

    close (s);

    puts ("OK");

    return 0;
    }
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    [akpm@linux-foundation.org: make it compile]
    [akpm@linux-foundation.org: add sys_ni stub]
    Signed-off-by: Ulrich Drepper
    Acked-by: Davide Libenzi
    Cc: Michael Kerrisk
    Cc:
    Cc: "David S. Miller"
    Cc: Roland McGrath
    Cc: Kyle McMartin
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Ulrich Drepper
     
  • This patch adds support for flag values which are ORed to the type passwd
    to socket and socketpair. The additional code is minimal. The flag
    values in this implementation can and must match the O_* flags. This
    avoids overhead in the conversion.

    The internal functions sock_alloc_fd and sock_map_fd get a new parameters
    and all callers are changed.

    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #include
    #include
    #include
    #include
    #include

    #define PORT 57392

    /* For Linux these must be the same. */
    #define SOCK_CLOEXEC O_CLOEXEC

    int
    main (void)
    {
    int fd;
    fd = socket (PF_INET, SOCK_STREAM, 0);
    if (fd == -1)
    {
    puts ("socket(0) failed");
    return 1;
    }
    int coe = fcntl (fd, F_GETFD);
    if (coe == -1)
    {
    puts ("fcntl failed");
    return 1;
    }
    if (coe & FD_CLOEXEC)
    {
    puts ("socket(0) set close-on-exec flag");
    return 1;
    }
    close (fd);

    fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
    if (fd == -1)
    {
    puts ("socket(SOCK_CLOEXEC) failed");
    return 1;
    }
    coe = fcntl (fd, F_GETFD);
    if (coe == -1)
    {
    puts ("fcntl failed");
    return 1;
    }
    if ((coe & FD_CLOEXEC) == 0)
    {
    puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
    return 1;
    }
    close (fd);

    int fds[2];
    if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
    puts ("socketpair(0) failed");
    return 1;
    }
    for (int i = 0; i < 2; ++i)
    {
    coe = fcntl (fds[i], F_GETFD);
    if (coe == -1)
    {
    puts ("fcntl failed");
    return 1;
    }
    if (coe & FD_CLOEXEC)
    {
    printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
    return 1;
    }
    close (fds[i]);
    }

    if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
    {
    puts ("socketpair(SOCK_CLOEXEC) failed");
    return 1;
    }
    for (int i = 0; i < 2; ++i)
    {
    coe = fcntl (fds[i], F_GETFD);
    if (coe == -1)
    {
    puts ("fcntl failed");
    return 1;
    }
    if ((coe & FD_CLOEXEC) == 0)
    {
    printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
    return 1;
    }
    close (fds[i]);
    }

    puts ("OK");

    return 0;
    }
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    Signed-off-by: Ulrich Drepper
    Acked-by: Davide Libenzi
    Cc: Michael Kerrisk
    Cc: "David S. Miller"
    Cc: Ralf Baechle
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Ulrich Drepper
     

08 Jul, 2008

1 commit


23 Mar, 2008

1 commit


29 Jan, 2008

3 commits

  • I have removed all the entries from this table (core_table,
    ipv4_table and tr_table), so now we can safely drop it.

    Signed-off-by: Pavel Emelyanov
    Signed-off-by: David S. Miller

    Pavel Emelyanov
     
  • The sock_wake_async() performs a bit different actions
    depending on "how" argument. Unfortunately this argument
    ony has numerical magic values.

    I propose to give names to their constants to help people
    reading this function callers understand what's going on
    without looking into this function all the time.

    I suppose this is 2.6.25 material, but if it's not (or the
    naming seems poor/bad/awful), I can rework it against the
    current net-2.6 tree.

    Signed-off-by: Pavel Emelyanov
    Signed-off-by: Herbert Xu
    Signed-off-by: David S. Miller

    Pavel Emelyanov
     
  • Support for network splice receive.

    Signed-off-by: Jens Axboe
    Signed-off-by: David S. Miller

    Jens Axboe
     

13 Nov, 2007

1 commit

  • ...and fix a couple of bugs in the NBD, CIFS and OCFS2 socket handlers.

    Looking at the sock->op->shutdown() handlers, it looks as if all of them
    take a SHUT_RD/SHUT_WR/SHUT_RDWR argument instead of the
    RCV_SHUTDOWN/SEND_SHUTDOWN arguments.
    Add a helper, and then define the SHUT_* enum to ensure that kernel users
    of shutdown() don't get confused.

    Signed-off-by: Trond Myklebust
    Acked-by: Mark Fasheh
    Acked-by: David Howells
    Signed-off-by: David S. Miller

    Trond Myklebust
     

22 Oct, 2007

1 commit


11 Oct, 2007

1 commit

  • This patch passes in the namespace a new socket should be created in
    and has the socket code do the appropriate reference counting. By
    virtue of this all socket create methods are touched. In addition
    the socket create methods are modified so that they will fail if
    you attempt to create a socket in a non-default network namespace.

    Failing if we attempt to create a socket outside of the default
    network namespace ensures that as we incrementally make the network stack
    network namespace aware we will not export functionality that someone
    has not audited and made certain is network namespace safe.
    Allowing us to partially enable network namespaces before all of the
    exotic protocols are supported.

    Any protocol layers I have missed will fail to compile because I now
    pass an extra parameter into the socket creation code.

    [ Integrated AF_IUCV build fixes from Andrew Morton... -DaveM ]

    Signed-off-by: Eric W. Biederman
    Signed-off-by: David S. Miller

    Eric W. Biederman
     

27 Apr, 2007

1 commit

  • Provide AF_RXRPC sockets that can be used to talk to AFS servers, or serve
    answers to AFS clients. KerberosIV security is fully supported. The patches
    and some example test programs can be found in:

    http://people.redhat.com/~dhowells/rxrpc/

    This will eventually replace the old implementation of kernel-only RxRPC
    currently resident in net/rxrpc/.

    Signed-off-by: David Howells
    Signed-off-by: David S. Miller

    David Howells
     

09 Feb, 2007

1 commit


01 Jan, 2007

1 commit


03 Dec, 2006

1 commit


17 Oct, 2006

1 commit

  • Make net_random() more widely available by calling it random32

    akpm: hopefully this will permit the removal of carta_random32. That needs
    confirmation from Stephane - this code looks somewhat more computationally
    expensive, and has a different (ie: callee-stateful) interface.

    [akpm@osdl.org: lots of build fixes, cleanups]
    Signed-off-by: Stephen Hemminger
    Signed-off-by: David S. Miller
    Cc: Stephane Eranian
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Stephen Hemminger
     

23 Sep, 2006

3 commits


30 Jun, 2006

1 commit

  • This patch implements an API whereby an application can determine the
    label of its peer's Unix datagram sockets via the auxiliary data mechanism of
    recvmsg.

    Patch purpose:

    This patch enables a security-aware application to retrieve the
    security context of the peer of a Unix datagram socket. The application
    can then use this security context to determine the security context for
    processing on behalf of the peer who sent the packet.

    Patch design and implementation:

    The design and implementation is very similar to the UDP case for INET
    sockets. Basically we build upon the existing Unix domain socket API for
    retrieving user credentials. Linux offers the API for obtaining user
    credentials via ancillary messages (i.e., out of band/control messages
    that are bundled together with a normal message). To retrieve the security
    context, the application first indicates to the kernel such desire by
    setting the SO_PASSSEC option via getsockopt. Then the application
    retrieves the security context using the auxiliary data mechanism.

    An example server application for Unix datagram socket should look like this:

    toggle = 1;
    toggle_len = sizeof(toggle);

    setsockopt(sockfd, SOL_SOCKET, SO_PASSSEC, &toggle, &toggle_len);
    recvmsg(sockfd, &msg_hdr, 0);
    if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) {
    cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr);
    if (cmsg_hdr->cmsg_len cmsg_level == SOL_SOCKET &&
    cmsg_hdr->cmsg_type == SCM_SECURITY) {
    memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext));
    }
    }

    sock_setsockopt is enhanced with a new socket option SOCK_PASSSEC to allow
    a server socket to receive security context of the peer.

    Testing:

    We have tested the patch by setting up Unix datagram client and server
    applications. We verified that the server can retrieve the security context
    using the auxiliary data mechanism of recvmsg.

    Signed-off-by: Catherine Zhang
    Acked-by: Acked-by: James Morris
    Signed-off-by: David S. Miller

    Catherine Zhang
     

26 Apr, 2006

1 commit


25 Apr, 2006

1 commit


22 Mar, 2006

1 commit


21 Mar, 2006

1 commit


04 Jan, 2006

1 commit

  • I noticed that some of 'struct proto_ops' used in the kernel may share
    a cache line used by locks or other heavily modified data. (default
    linker alignement is 32 bytes, and L1_CACHE_LINE is 64 or 128 at
    least)

    This patch makes sure a 'struct proto_ops' can be declared as const,
    so that all cpus can share all parts of it without false sharing.

    This is not mandatory : a driver can still use a read/write structure
    if it needs to (and eventually a __read_mostly)

    I made a global stubstitute to change all existing occurences to make
    them const.

    This should reduce the possibility of false sharing on SMP, and
    speedup some socket system calls.

    Signed-off-by: Eric Dumazet
    Signed-off-by: David S. Miller

    Eric Dumazet
     

07 Nov, 2005

1 commit

  • Fix various warnings in kernel-doc:

    Warning(linux-2614-rc4//include/linux/net.h:89): Enum value 'SOCK_DCCP' not described in enum 'sock_type'

    usercopy.c: should use !E instead of !I for exported symbols:
    Warning(linux-2614-rc4//arch/i386/lib/usercopy.c): no structured comments found

    fs.h does not need to use !E since it has no exported symbols:
    Warning(linux-2614-rc4//include/linux/fs.h:1182): No description found for parameter 'find_exported_dentry'
    Warning(linux-2614-rc4//include/linux/fs.h): no structured comments found

    irq/manage.c should use !E for its exported symbols:
    Warning(linux-2614-rc4//kernel/irq/manage.c): no structured comments found

    macmodes.c should use !E for its exported symbols:
    Warning(linux-2614-rc4//drivers/video/macmodes.c): no structured comments found

    Signed-off-by: Randy Dunlap
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Randy Dunlap
     

30 Aug, 2005

3 commits

  • Of this type, mostly:

    CHECK net/ipv6/netfilter.c
    net/ipv6/netfilter.c:96:12: warning: symbol 'ipv6_netfilter_init' was not declared. Should it be static?
    net/ipv6/netfilter.c:101:6: warning: symbol 'ipv6_netfilter_fini' was not declared. Should it be static?

    Signed-off-by: Arnaldo Carvalho de Melo
    Signed-off-by: David S. Miller

    Arnaldo Carvalho de Melo
     
  • Development to this point was done on a subversion repository at:

    http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/

    This repository will be kept at this site for the foreseable future,
    so that interested parties can see the history of this code,
    attributions, etc.

    If I ever decide to take this offline I'll provide the full history at
    some other suitable place.

    Signed-off-by: Arnaldo Carvalho de Melo
    Signed-off-by: David S. Miller

    Arnaldo Carvalho de Melo
     
  • - Remove bogus code for compiling netlink as module
    - Add module refcounting support for modules implementing a netlink
    protocol
    - Add support for autoloading modules that implement a netlink protocol
    as soon as someone opens a socket for that protocol

    Signed-off-by: Harald Welte
    Signed-off-by: David S. Miller

    Harald Welte
     

24 May, 2005

1 commit


06 May, 2005

1 commit


01 May, 2005

1 commit

  • I have recompiled Linux kernel 2.6.11.5 documentation for me and our
    university students again. The documentation could be extended for more
    sources which are equipped by structured comments for recent 2.6 kernels. I
    have tried to proceed with that task. I have done that more times from 2.6.0
    time and it gets boring to do same changes again and again. Linux kernel
    compiles after changes for i386 and ARM targets. I have added references to
    some more files into kernel-api book, I have added some section names as well.
    So please, check that changes do not break something and that categories are
    not too much skewed.

    I have changed kernel-doc to accept "fastcall" and "asmlinkage" words reserved
    by kernel convention. Most of the other changes are modifications in the
    comments to make kernel-doc happy, accept some parameters description and do
    not bail out on errors. Changed to @pid in the description, moved some
    #ifdef before comments to correct function to comments bindings, etc.

    You can see result of the modified documentation build at
    http://cmp.felk.cvut.cz/~pisa/linux/lkdb-2.6.11.tar.gz

    Some more sources are ready to be included into kernel-doc generated
    documentation. Sources has been added into kernel-api for now. Some more
    section names added and probably some more chaos introduced as result of quick
    cleanup work.

    Signed-off-by: Pavel Pisa
    Signed-off-by: Martin Waitz
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Pavel Pisa
     

17 Apr, 2005

1 commit

  • Initial git repository build. I'm not bothering with the full history,
    even though we have it. We can create a separate "historical" git
    archive of that later if we want to, and in the meantime it's about
    3.2GB when imported into git - space that would just make the early
    git days unnecessarily complicated, when we don't have a lot of good
    infrastructure for it.

    Let it rip!

    Linus Torvalds