Eric Lee / smarc-fsl-linux-kernel

25 Jul, 2008

27 commits

9fe5ad9c8 flag parameters add-on: remove epoll_create size param ... Browse Code »

Remove the size parameter from the new epoll_create syscall and renames the
syscall itself. The updated test program follows.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include
#include

#ifndef __NR_epoll_create2
# ifdef __x86_64__
# define __NR_epoll_create2 291
# elif defined __i386__
# define __NR_epoll_create2 329
# else
# error "need __NR_epoll_create2"
# endif
#endif

#define EPOLL_CLOEXEC O_CLOEXEC

int
main (void)
{
int fd = syscall (__NR_epoll_create2, 0);
if (fd == -1)
{
puts ("epoll_create2(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("epoll_create2(0) set close-on-exec flag");
return 1;
}
close (fd);

fd = syscall (__NR_epoll_create2, EPOLL_CLOEXEC);
if (fd == -1)
{
puts ("epoll_create2(EPOLL_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("epoll_create2(EPOLL_CLOEXEC) set close-on-exec flag");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Cc:
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:29 +0800
e38b36f32 flag parameters: check magic constants ... Browse Code »

This patch adds test that ensure the boundary conditions for the various
constants introduced in the previous patches is met. No code is generated.

[akpm@linux-foundation.org: fix alpha]
Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:29 +0800
510df2dd4 flag parameters: NONBLOCK in inotify_init ... Browse Code »

This patch adds non-blocking support for inotify_init1. The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include

#ifndef __NR_inotify_init1
# ifdef __x86_64__
# define __NR_inotify_init1 294
# elif defined __i386__
# define __NR_inotify_init1 332
# else
# error "need __NR_inotify_init1"
# endif
#endif

#define IN_NONBLOCK O_NONBLOCK

int
main (void)
{
int fd = syscall (__NR_inotify_init1, 0);
if (fd == -1)
{
puts ("inotify_init1(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("inotify_init1(0) set non-blocking mode");
return 1;
}
close (fd);

fd = syscall (__NR_inotify_init1, IN_NONBLOCK);
if (fd == -1)
{
puts ("inotify_init1(IN_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("inotify_init1(IN_NONBLOCK) set non-blocking mode");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:29 +0800
be61a86d7 flag parameters: NONBLOCK in pipe ... Browse Code »

This patch adds O_NONBLOCK support to pipe2. It is minimally more involved
than the patches for eventfd et.al but still trivial. The interfaces of the
create_write_pipe and create_read_pipe helper functions were changed and the
one other caller as well.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include

#ifndef __NR_pipe2
# ifdef __x86_64__
# define __NR_pipe2 293
# elif defined __i386__
# define __NR_pipe2 331
# else
# error "need __NR_pipe2"
# endif
#endif

int
main (void)
{
int fds[2];
if (syscall (__NR_pipe2, fds, 0) == -1)
{
puts ("pipe2(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
int fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
printf ("pipe2(0) set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}

if (syscall (__NR_pipe2, fds, O_NONBLOCK) == -1)
{
puts ("pipe2(O_NONBLOCK) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
int fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
printf ("pipe2(O_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:29 +0800
6b1ef0e60 flag parameters: NONBLOCK in timerfd_create ... Browse Code »

This patch adds support for the TFD_NONBLOCK flag to timerfd_create. The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include
#include

#ifndef __NR_timerfd_create
# ifdef __x86_64__
# define __NR_timerfd_create 283
# elif defined __i386__
# define __NR_timerfd_create 322
# else
# error "need __NR_timerfd_create"
# endif
#endif

#define TFD_NONBLOCK O_NONBLOCK

int
main (void)
{
int fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, 0);
if (fd == -1)
{
puts ("timerfd_create(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("timerfd_create(0) set non-blocking mode");
return 1;
}
close (fd);

fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, TFD_NONBLOCK);
if (fd == -1)
{
puts ("timerfd_create(TFD_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("timerfd_create(TFD_NONBLOCK) set non-blocking mode");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:29 +0800
e7d476dfd flag parameters: NONBLOCK in eventfd ... Browse Code »

This patch adds support for the EFD_NONBLOCK flag to eventfd2. The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include

#ifndef __NR_eventfd2
# ifdef __x86_64__
# define __NR_eventfd2 290
# elif defined __i386__
# define __NR_eventfd2 328
# else
# error "need __NR_eventfd2"
# endif
#endif

#define EFD_NONBLOCK O_NONBLOCK

int
main (void)
{
int fd = syscall (__NR_eventfd2, 1, 0);
if (fd == -1)
{
puts ("eventfd2(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("eventfd2(0) sets non-blocking mode");
return 1;
}
close (fd);

fd = syscall (__NR_eventfd2, 1, EFD_NONBLOCK);
if (fd == -1)
{
puts ("eventfd2(EFD_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("eventfd2(EFD_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:29 +0800
5fb5e0492 flag parameters: NONBLOCK in signalfd ... Browse Code »

This patch adds support for the SFD_NONBLOCK flag to signalfd4. The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include
#include

#ifndef __NR_signalfd4
# ifdef __x86_64__
# define __NR_signalfd4 289
# elif defined __i386__
# define __NR_signalfd4 327
# else
# error "need __NR_signalfd4"
# endif
#endif

#define SFD_NONBLOCK O_NONBLOCK

int
main (void)
{
sigset_t ss;
sigemptyset (&ss);
sigaddset (&ss, SIGUSR1);
int fd = syscall (__NR_signalfd4, -1, &ss, 8, 0);
if (fd == -1)
{
puts ("signalfd4(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("signalfd4(0) set non-blocking mode");
return 1;
}
close (fd);

fd = syscall (__NR_signalfd4, -1, &ss, 8, SFD_NONBLOCK);
if (fd == -1)
{
puts ("signalfd4(SFD_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("signalfd4(SFD_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:29 +0800
99829b832 flag parameters: NONBLOCK in anon_inode_getfd ... Browse Code »

Building on the previous change to anon_inode_getfd, this patch introduces
support for handling of O_NONBLOCK in addition to the already supported
O_CLOEXEC. Following patches will take advantage of this support. As can be
seen, the additional support for supporting this functionality is minimal.

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:28 +0800
4006553b0 flag parameters: inotify_init ... Browse Code »

This patch introduces the new syscall inotify_init1 (note: the 1 stands for
the one parameter the syscall takes, as opposed to no parameter before). The
values accepted for this parameter are function-specific and defined in the
inotify.h header. Here the values must match the O_* flags, though. In this
patch CLOEXEC support is introduced.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include

#ifndef __NR_inotify_init1
# ifdef __x86_64__
# define __NR_inotify_init1 294
# elif defined __i386__
# define __NR_inotify_init1 332
# else
# error "need __NR_inotify_init1"
# endif
#endif

#define IN_CLOEXEC O_CLOEXEC

int
main (void)
{
int fd;
fd = syscall (__NR_inotify_init1, 0);
if (fd == -1)
{
puts ("inotify_init1(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("inotify_init1(0) set close-on-exit");
return 1;
}
close (fd);

fd = syscall (__NR_inotify_init1, IN_CLOEXEC);
if (fd == -1)
{
puts ("inotify_init1(IN_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("inotify_init1(O_CLOEXEC) does not set close-on-exit");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Cc:
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:28 +0800
ed8cae8ba flag parameters: pipe ... Browse Code »

This patch introduces the new syscall pipe2 which is like pipe but it also
takes an additional parameter which takes a flag value. This patch implements
the handling of O_CLOEXEC for the flag. I did not add support for the new
syscall for the architectures which have a special sys_pipe implementation. I
think the maintainers of those archs have the chance to go with the unified
implementation but that's up to them.

The implementation introduces do_pipe_flags. I did that instead of changing
all callers of do_pipe because some of the callers are written in assembler.
I would probably screw up changing the assembly code. To avoid breaking code
do_pipe is now a small wrapper around do_pipe_flags. Once all callers are
changed over to do_pipe_flags the old do_pipe function can be removed.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include

#ifndef __NR_pipe2
# ifdef __x86_64__
# define __NR_pipe2 293
# elif defined __i386__
# define __NR_pipe2 331
# else
# error "need __NR_pipe2"
# endif
#endif

int
main (void)
{
int fd[2];
if (syscall (__NR_pipe2, fd, 0) != 0)
{
puts ("pipe2(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
int coe = fcntl (fd[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("pipe2(0) set close-on-exit for fd[%d]\n", i);
return 1;
}
}
close (fd[0]);
close (fd[1]);

if (syscall (__NR_pipe2, fd, O_CLOEXEC) != 0)
{
puts ("pipe2(O_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
int coe = fcntl (fd[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("pipe2(O_CLOEXEC) does not set close-on-exit for fd[%d]\n", i);
return 1;
}
}
close (fd[0]);
close (fd[1]);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Cc:
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:28 +0800
336dd1f70 flag parameters: dup2 ... Browse Code »

This patch adds the new dup3 syscall. It extends the old dup2 syscall by one
parameter which is meant to hold a flag value. Support for the O_CLOEXEC flag
is added in this patch.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include
#include

#ifndef __NR_dup3
# ifdef __x86_64__
# define __NR_dup3 292
# elif defined __i386__
# define __NR_dup3 330
# else
# error "need __NR_dup3"
# endif
#endif

int
main (void)
{
int fd = syscall (__NR_dup3, 1, 4, 0);
if (fd == -1)
{
puts ("dup3(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("dup3(0) set close-on-exec flag");
return 1;
}
close (fd);

fd = syscall (__NR_dup3, 1, 4, O_CLOEXEC);
if (fd == -1)
{
puts ("dup3(O_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("dup3(O_CLOEXEC) set close-on-exec flag");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Cc:
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:28 +0800
a0998b50c flag parameters: epoll_create ... Browse Code »

This patch adds the new epoll_create2 syscall. It extends the old epoll_create
syscall by one parameter which is meant to hold a flag value. In this
patch the only flag support is EPOLL_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name EPOLL_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include
#include

#ifndef __NR_epoll_create2
# ifdef __x86_64__
# define __NR_epoll_create2 291
# elif defined __i386__
# define __NR_epoll_create2 329
# else
# error "need __NR_epoll_create2"
# endif
#endif

#define EPOLL_CLOEXEC O_CLOEXEC

int
main (void)
{
int fd = syscall (__NR_epoll_create2, 1, 0);
if (fd == -1)
{
puts ("epoll_create2(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("epoll_create2(0) set close-on-exec flag");
return 1;
}
close (fd);

fd = syscall (__NR_epoll_create2, 1, EPOLL_CLOEXEC);
if (fd == -1)
{
puts ("epoll_create2(EPOLL_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("epoll_create2(EPOLL_CLOEXEC) set close-on-exec flag");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Cc:
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:28 +0800
11fcb6c14 flag parameters: timerfd_create ... Browse Code »

The timerfd_create syscall already has a flags parameter. It just is
unused so far. This patch changes this by introducing the TFD_CLOEXEC
flag to set the close-on-exec flag for the returned file descriptor.

A new name TFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include
#include

#ifndef __NR_timerfd_create
# ifdef __x86_64__
# define __NR_timerfd_create 283
# elif defined __i386__
# define __NR_timerfd_create 322
# else
# error "need __NR_timerfd_create"
# endif
#endif

#define TFD_CLOEXEC O_CLOEXEC

int
main (void)
{
int fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, 0);
if (fd == -1)
{
puts ("timerfd_create(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("timerfd_create(0) set close-on-exec flag");
return 1;
}
close (fd);

fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, TFD_CLOEXEC);
if (fd == -1)
{
puts ("timerfd_create(TFD_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("timerfd_create(TFD_CLOEXEC) set close-on-exec flag");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:27 +0800
b087498eb flag parameters: eventfd ... Browse Code »

This patch adds the new eventfd2 syscall. It extends the old eventfd
syscall by one parameter which is meant to hold a flag value. In this
patch the only flag support is EFD_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name EFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include

#ifndef __NR_eventfd2
# ifdef __x86_64__
# define __NR_eventfd2 290
# elif defined __i386__
# define __NR_eventfd2 328
# else
# error "need __NR_eventfd2"
# endif
#endif

#define EFD_CLOEXEC O_CLOEXEC

int
main (void)
{
int fd = syscall (__NR_eventfd2, 1, 0);
if (fd == -1)
{
puts ("eventfd2(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("eventfd2(0) sets close-on-exec flag");
return 1;
}
close (fd);

fd = syscall (__NR_eventfd2, 1, EFD_CLOEXEC);
if (fd == -1)
{
puts ("eventfd2(EFD_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("eventfd2(EFD_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Cc:
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:27 +0800
9deb27bae flag parameters: signalfd ... Browse Code »

This patch adds the new signalfd4 syscall. It extends the old signalfd
syscall by one parameter which is meant to hold a flag value. In this
patch the only flag support is SFD_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name SFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include
#include
#include
#include
#include

#ifndef __NR_signalfd4
# ifdef __x86_64__
# define __NR_signalfd4 289
# elif defined __i386__
# define __NR_signalfd4 327
# else
# error "need __NR_signalfd4"
# endif
#endif

#define SFD_CLOEXEC O_CLOEXEC

int
main (void)
{
sigset_t ss;
sigemptyset (&ss);
sigaddset (&ss, SIGUSR1);
int fd = syscall (__NR_signalfd4, -1, &ss, 8, 0);
if (fd == -1)
{
puts ("signalfd4(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("signalfd4(0) set close-on-exec flag");
return 1;
}
close (fd);

fd = syscall (__NR_signalfd4, -1, &ss, 8, SFD_CLOEXEC);
if (fd == -1)
{
puts ("signalfd4(SFD_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("signalfd4(SFD_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Cc:
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:27 +0800
7d9dbca34 flag parameters: anon_inode_getfd extension ... Browse Code »

This patch just extends the anon_inode_getfd interface to take an additional
parameter with a flag value. The flag value is passed on to
get_unused_fd_flags in anticipation for a use with the O_CLOEXEC flag.

No actual semantic changes here, the changed callers all pass 0 for now.

[akpm@linux-foundation.org: KVM fix]
Signed-off-by: Ulrich Drepper
Acked-by: Davide Libenzi
Cc: Michael Kerrisk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Ulrich Drepper
2008-07-25 01:47:27 +0800
6e2c10a12 binfmt_misc: use simple_read_from_buffer() ... Browse Code »

Signed-off-by: Akinobu Mita
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Akinobu Mita
2008-07-25 01:47:27 +0800
f4a67ccee fs: check for statfs overflow ... Browse Code »

Adds a check for an overflow in the filesystem size so if someone is
checking with statfs() on a 16G blocksize hugetlbfs in a 32bit binary that
it will report back EOVERFLOW instead of a size of 0.

Acked-by: Nishanth Aravamudan
Signed-off-by: Jon Tollefson
Signed-off-by: Nick Piggin
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Jon Tollefson
2008-07-25 01:47:19 +0800
a137e1cc6 hugetlbfs: per mount huge page sizes ... Browse Code »

Add the ability to configure the hugetlb hstate used on a per mount basis.

- Add a new pagesize= option to the hugetlbfs mount that allows setting
the page size
- This option causes the mount code to find the hstate corresponding to the
specified size, and sets up a pointer to the hstate in the mount's
superblock.
- Change the hstate accessors to use this information rather than the
global_hstate they were using (requires a slight change in mm/memory.c
so we don't NULL deref in the error-unmap path -- see comments).

[np: take hstate out of hugetlbfs inode and vma->vm_private_data]

Acked-by: Adam Litke
Acked-by: Nishanth Aravamudan
Signed-off-by: Andi Kleen
Signed-off-by: Nick Piggin
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Andi Kleen
2008-07-25 01:47:17 +0800
a55164389 hugetlb: modular state for hugetlb page size ... Browse Code »

The goal of this patchset is to support multiple hugetlb page sizes. This
is achieved by introducing a new struct hstate structure, which
encapsulates the important hugetlb state and constants (eg. huge page
size, number of huge pages currently allocated, etc).

The hstate structure is then passed around the code which requires these
fields, they will do the right thing regardless of the exact hstate they
are operating on.

This patch adds the hstate structure, with a single global instance of it
(default_hstate), and does the basic work of converting hugetlb to use the
hstate.

Future patches will add more hstate structures to allow for different
hugetlbfs mounts to have different page sizes.

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Adam Litke
Acked-by: Nishanth Aravamudan
Signed-off-by: Andi Kleen
Signed-off-by: Nick Piggin
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Andi Kleen
2008-07-25 01:47:17 +0800
a47a126ad vmallocinfo: add NUMA information ... Browse Code »

Christoph recently added /proc/vmallocinfo file to get information about
vmalloc allocations.

This patch adds NUMA specific information, giving number of pages
allocated on each memory node.

This should help to check that vmalloc() is able to respect NUMA policies.

Example of output on a four nodes machine (one cpu per node)

1) network hash tables are evenly spreaded on four nodes (OK) (Same
point for inodes and dentries hash tables)

2) iptables tables (x_tables) are correctly allocated on each cpu node
(OK).

3) sys_swapon() allocates its memory from one node only.

4) each loaded module is using memory on one node.

Sysadmins could tune their setup to change points 3) and 4) if necessary.

grep "pages=" /proc/vmallocinfo
0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204/0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128
0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204/0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
0xffffc2000031a000-0xffffc2000031d000 12288 alloc_large_system_hash+0x204/0x2c0 pages=2 vmalloc N1=1 N2=1
0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e/0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3
0xffffc2000033e000-0xffffc20000341000 12288 sys_swapon+0x640/0xac0 pages=2 vmalloc N0=2
0xffffc20000341000-0xffffc20000344000 12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N0=2
0xffffc20000344000-0xffffc20000347000 12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N1=2
0xffffc20000347000-0xffffc2000034a000 12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N2=2
0xffffc2000034a000-0xffffc2000034d000 12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N3=2
0xffffc20004381000-0xffffc20004402000 528384 alloc_large_system_hash+0x204/0x2c0 pages=128 vmalloc N0=32 N1=32 N2=32 N3=32
0xffffc20004402000-0xffffc20004803000 4198400 alloc_large_system_hash+0x204/0x2c0 pages=1024 vmalloc vpages N0=256 N1=256 N2=256 N3=256
0xffffc20004803000-0xffffc20004904000 1052672 alloc_large_system_hash+0x204/0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
0xffffc20004904000-0xffffc20004bec000 3047424 sys_swapon+0x640/0xac0 pages=743 vmalloc vpages N0=743
0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 pages=14 vmalloc N1=14
0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 pages=4 vmalloc N0=4
0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N0=2
0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 pages=10 vmalloc N1=10
0xffffffffa0022000-0xffffffffa0028000 24576 sys_init_module+0xc27/0x1d00 pages=5 vmalloc N3=5
0xffffffffa0028000-0xffffffffa0050000 163840 sys_init_module+0xc27/0x1d00 pages=39 vmalloc N1=39
0xffffffffa0050000-0xffffffffa0052000 8192 sys_init_module+0xc27/0x1d00 pages=1 vmalloc N1=1
0xffffffffa0052000-0xffffffffa0056000 16384 sys_init_module+0xc27/0x1d00 pages=3 vmalloc N1=3
0xffffffffa0056000-0xffffffffa0081000 176128 sys_init_module+0xc27/0x1d00 pages=42 vmalloc N3=42
0xffffffffa0081000-0xffffffffa00ae000 184320 sys_init_module+0xc27/0x1d00 pages=44 vmalloc N3=44
0xffffffffa00ae000-0xffffffffa00b1000 12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2
0xffffffffa00b1000-0xffffffffa00b9000 32768 sys_init_module+0xc27/0x1d00 pages=7 vmalloc N0=7
0xffffffffa00b9000-0xffffffffa00c4000 45056 sys_init_module+0xc27/0x1d00 pages=10 vmalloc N3=10
0xffffffffa00c6000-0xffffffffa00e0000 106496 sys_init_module+0xc27/0x1d00 pages=25 vmalloc N2=25
0xffffffffa00e0000-0xffffffffa00f1000 69632 sys_init_module+0xc27/0x1d00 pages=16 vmalloc N2=16
0xffffffffa00f1000-0xffffffffa00f4000 12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2
0xffffffffa00f4000-0xffffffffa00f7000 12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2

[akpm@linux-foundation.org: fix comment]
Signed-off-by: Eric Dumazet
Cc: Christoph Lameter
Cc: Randy Dunlap
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Eric Dumazet
2008-07-25 01:47:17 +0800
cce770815 SYNC_FILE_RANGE_WRITE may and will block. Document that. ... Browse Code »

[akpm@linux-foundation.org: fix comment text]
Signed-off-by: Pavel Machek
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Pavel Machek
2008-07-25 01:47:17 +0800
04f2cbe35 hugetlb: guarantee that COW faults for a process that called mmap(MAP_PRIVATE) o… ... Browse Code »

…n hugetlbfs will succeed

After patch 2 in this series, a process that successfully calls mmap() for
a MAP_PRIVATE mapping will be guaranteed to successfully fault until a
process calls fork(). At that point, the next write fault from the parent
could fail due to COW if the child still has a reference.

We only reserve pages for the parent but a copy must be made to avoid
leaking data from the parent to the child after fork(). Reserves could be
taken for both parent and child at fork time to guarantee faults but if
the mapping is large it is highly likely we will not have sufficient pages
for the reservation, and it is common to fork only to exec() immediatly
after. A failure here would be very undesirable.

Note that the current behaviour of mainline with MAP_PRIVATE pages is
pretty bad. The following situation is allowed to occur today.

1. Process calls mmap(MAP_PRIVATE)
2. Process calls mlock() to fault all pages and makes sure it succeeds
3. Process forks()
4. Process writes to MAP_PRIVATE mapping while child still exists
5. If the COW fails at this point, the process gets SIGKILLed even though it
had taken care to ensure the pages existed

This patch improves the situation by guaranteeing the reliability of the
process that successfully calls mmap(). When the parent performs COW, it
will try to satisfy the allocation without using reserves. If that fails
the parent will steal the page leaving any children without a page.
Faults from the child after that point will result in failure. If the
child COW happens first, an attempt will be made to allocate the page
without reserves and the child will get SIGKILLed on failure.

To summarise the new behaviour:

1. If the original mapper performs COW on a private mapping with multiple
references, it will attempt to allocate a hugepage from the pool or
the buddy allocator without using the existing reserves. On fail, VMAs
mapping the same area are traversed and the page being COW'd is unmapped
where found. It will then steal the original page as the last mapper in
the normal way.

2. The VMAs the pages were unmapped from are flagged to note that pages
with data no longer exist. Future no-page faults on those VMAs will
terminate the process as otherwise it would appear that data was corrupted.
A warning is printed to the console that this situation occured.

2. If the child performs COW first, it will attempt to satisfy the COW
from the pool if there are enough pages or via the buddy allocator if
overcommit is allowed and the buddy allocator can satisfy the request. If
it fails, the child will be killed.

If the pool is large enough, existing applications will not notice that
the reserves were a factor. Existing applications depending on the
no-reserves been set are unlikely to exist as for much of the history of
hugetlbfs, pages were prefaulted at mmap(), allocating the pages at that
point or failing the mmap().

[npiggin@suse.de: fix CONFIG_HUGETLB=n build]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Mel Gorman
2008-07-25 01:47:16 +0800
a1e78772d hugetlb: reserve huge pages for reliable MAP_PRIVATE hugetlbfs mappings until fork() ... Browse Code »

This patch reserves huge pages at mmap() time for MAP_PRIVATE mappings in
a similar manner to the reservations taken for MAP_SHARED mappings. The
reserve count is accounted both globally and on a per-VMA basis for
private mappings. This guarantees that a process that successfully calls
mmap() will successfully fault all pages in the future unless fork() is
called.

The characteristics of private mappings of hugetlbfs files behaviour after
this patch are;

1. The process calling mmap() is guaranteed to succeed all future faults until
it forks().
2. On fork(), the parent may die due to SIGKILL on writes to the private
mapping if enough pages are not available for the COW. For reasonably
reliable behaviour in the face of a small huge page pool, children of
hugepage-aware processes should not reference the mappings; such as
might occur when fork()ing to exec().
3. On fork(), the child VMAs inherit no reserves. Reads on pages already
faulted by the parent will succeed. Successful writes will depend on enough
huge pages being free in the pool.
4. Quotas of the hugetlbfs mount are checked at reserve time for the mapper
and at fault time otherwise.

Before this patch, all reads or writes in the child potentially needs page
allocations that can later lead to the death of the parent. This applies
to reads and writes of uninstantiated pages as well as COW. After the
patch it is only a write to an instantiated page that causes problems.

Signed-off-by: Mel Gorman
Acked-by: Adam Litke
Cc: Andy Whitcroft
Cc: William Lee Irwin III
Cc: Hugh Dickins
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Mel Gorman
2008-07-25 01:47:16 +0800
da3bbdd46 fix soft lock up at NFS mount via per-SB LRU-list of unused dentries ... Browse Code »

[Summary]

Split LRU-list of unused dentries to one per superblock to avoid soft
lock up during NFS mounts and remounting of any filesystem.

Previously I posted here:
http://lkml.org/lkml/2008/3/5/590

[Descriptions]

- background

dentry_unused is a list of dentries which are not referenced.
dentry_unused grows up when references on directories or files are
released. This list can be very long if there is huge free memory.

- the problem

When shrink_dcache_sb() is called, it scans all dentry_unused linearly
under spin_lock(), and if dentry->d_sb is differnt from given
superblock, scan next dentry. This scan costs very much if there are
many entries, and very ineffective if there are many superblocks.

IOW, When we need to shrink unused dentries on one dentry, but scans
unused dentries on all superblocks in the system. For example, we scan
500 dentries to unmount a filesystem, but scans 1,000,000 or more unused
dentries on other superblocks.

In our case , At mounting NFS*, shrink_dcache_sb() is called to shrink
unused dentries on NFS, but scans 100,000,000 unused dentries on
superblocks in the system such as local ext3 filesystems. I hear NFS
mounting took 1 min on some system in use.

* : NFS uses virtual filesystem in rpc layer, so NFS is affected by
this problem.

100,000,000 is possible number on large systems.

Per-superblock LRU of unused dentried can reduce the cost in
reasonable manner.

- How to fix

I found this problem is solved by David Chinner's "Per-superblock
unused dentry LRU lists V3"(1), so I rebase it and add some fix to
reclaim with fairness, which is in Andrew Morton's comments(2).

1) http://lkml.org/lkml/2006/5/25/318
2) http://lkml.org/lkml/2006/5/25/320

Split LRU-list of unused dentries to each superblocks. Then, NFS
mounting will check dentries under a superblock instead of all. But
this spliting will break LRU of dentry-unused. So, I've attempted to
make reclaim unused dentrins with fairness by calculate number of
dentries to scan on this sb based on following way

number of dentries to scan on this sb =
count * (number of dentries on this sb / number of dentries in the machine)

- ToDo
- I have to measuring performance number and do stress tests.

- When unmount occurs during prune_dcache(), scanning on same
superblock, It is unable to reach next superblock because it is gone
away. We restart scannig superblock from first one, it causes
unfairness of reclaim unused dentries on first superblock. But I think
this happens very rarely.

- Test Results

Result on 6GB boxes with excessive unused dentries.

Without patch:

$ cat /proc/sys/fs/dentry-state
10181835 10180203 45 0 0 0
# mount -t nfs 10.124.60.70:/work/kernel-src nfs
real 0m1.830s
user 0m0.001s
sys 0m1.653s

With this patch:
$ cat /proc/sys/fs/dentry-state
10236610 10234751 45 0 0 0
# mount -t nfs 10.124.60.70:/work/kernel-src nfs
real 0m0.106s
user 0m0.002s
sys 0m0.032s

[akpm@linux-foundation.org: fix comments]
Signed-off-by: Kentaro Makita
Cc: Neil Brown
Cc: Trond Myklebust
Cc: David Chinner
Cc: "J. Bruce Fields"
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Kentaro Makita
2008-07-25 01:47:15 +0800
42b777281 mm: remove double indirection on tlb parameter to free_pgd_range() & Co ... Browse Code »

The double indirection here is not needed anywhere and hence (at least)
confusing.

Signed-off-by: Jan Beulich
Cc: Hugh Dickins
Cc: Nick Piggin
Cc: Christoph Lameter
Cc: Benjamin Herrenschmidt
Cc: Paul Mackerras
Cc: "Luck, Tony"
Cc: Paul Mundt
Cc: "David S. Miller"
Acked-by: Jeremy Fitzhardinge
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Jan Beulich
2008-07-25 01:47:15 +0800
c748e1340 mm/vmstat.c: proper externs ... Browse Code »

This patch adds proper extern declarations for five variables in
include/linux/vmstat.h

Signed-off-by: Adrian Bunk
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Adrian Bunk
2008-07-25 01:47:14 +0800

23 Jul, 2008

5 commits

c010b2f76 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6 ... Browse Code »

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (82 commits)
ipw2200: Call netif_*_queue() interfaces properly.
netxen: Needs to include linux/vmalloc.h
[netdrvr] atl1d: fix !CONFIG_PM build
r6040: rework init_one error handling
r6040: bump release number to 0.18
r6040: handle RX fifo full and no descriptor interrupts
r6040: change the default waiting time
r6040: use definitions for magic values in descriptor status
r6040: completely rework the RX path
r6040: call napi_disable when puting down the interface and set lp->dev accordingly.
mv643xx_eth: fix NETPOLL build
r6040: rework the RX buffers allocation routine
r6040: fix scheduling while atomic in r6040_tx_timeout
r6040: fix null pointer access and tx timeouts
r6040: prefix all functions with r6040
rndis_host: support WM6 devices as modems
at91_ether: use netstats in net_device structure
sfc: Create one RX queue and interrupt per CPU package by default
sfc: Use a separate workqueue for resets
sfc: I2C adapter initialisation fixes
...

Linus Torvalds
2008-07-23 10:09:51 +0800
8086cd451 netns: make get_proc_net() static ... Browse Code »

get_proc_net() can now become static.

Signed-off-by: Adrian Bunk
Acked-by: Pavel Emelyanov
Signed-off-by: David S. Miller

Adrian Bunk
2008-07-23 05:19:19 +0800
53baaaa96 Merge git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core-2.6 ... Browse Code »

* git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core-2.6: (79 commits)
arm: bus_id -> dev_name() and dev_set_name() conversions
sparc64: fix up bus_id changes in sparc core code
3c59x: handle pci_name() being const
MTD: handle pci_name() being const
HP iLO driver
sysdev: Convert the x86 mce tolerant sysdev attribute to generic attribute
sysdev: Add utility functions for simple int/ulong variable sysdev attributes
sysdev: Pass the attribute to the low level sysdev show/store function
driver core: Suppress sysfs warnings for device_rename().
kobject: Transmit return value of call_usermodehelper() to caller
sysfs-rules.txt: reword API stability statement
debugfs: Implement debugfs_remove_recursive()
HOWTO: change email addresses of James in HOWTO
always enable FW_LOADER unless EMBEDDED=y
uio-howto.tmpl: use unique output names
uio-howto.tmpl: use standard copyright/legal markings
sysfs: don't call notify_change
sysdev: fix debugging statements in registration code.
kobject: should use kobject_put() in kset-example
kobject: reorder kobject to save space on 64 bit builds
...

Linus Torvalds
2008-07-23 04:13:47 +0800
ee1e6ab60 proc: fix /proc/*/pagemap some more ... Browse Code »

struct pagemap_walk was placed on stack, some hooks are initialized, the
rest (->pgd_entry, ->pud_entry, ->pte_entry) are valid but junk.

Reported-by: Eric Sesterhenn
Signed-off-by: Alexey Dobriyan
Cc: "Vegard Nossum"
Cc: [2.6.25.x, 2.6.26.x]
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

Alexey Dobriyan
2008-07-23 00:59:41 +0800
651910874 execve filename: document and export via auxiliary vector ... Browse Code »

The Linux kernel puts the filename argument of execve() into the new
address space. Many developers are surprised to learn this. Those who
know and could use it, object "But it's not documented."

Those who want to use it dislike the expression
(char *)(1+ strlen(env[-1+ n_env]) + env[-1+ n_env])
because it requires locating the last original environment variable,
and assumes that the filename follows the characters.

This patch documents the insertion of the filename, and makes it easier
to find by adding a new tag AT_EXECFN in the ElfXX_auxv_t; see .

In many cases readlink("/proc/self/exe",) gives the same answer. But if
all the original pages get unmapped, then the kernel erases the symlink
for /proc/self/exe. This can happen when a program decompressor does a
good job of cleaning up after uncompressing directly to memory, so that
the address space of the target program looks the same as if compression
had never happened. One example is http://upx.sourceforge.net .

One notable use of the underlying concept (what path containED the
executable) is glibc expanding $ORIGIN in DT_RUNPATH. In practice for
the near term, it may be a good idea for user-mode code to use both
/proc/self/exe and AT_EXECFN as fall-back methods for each other.
/proc/self/exe can fail due to unmapping, AT_EXECFN can fail because it
won't be present on non-new systems. The auxvec or {AT_EXECFN}.d_val
also can get overwritten, although in nearly all cases this would be the
result of a bug.

The runtime cost is one NEW_AUX_ENT using two words of stack space. The
underlying value is maintained already as bprm->exec; setup_arg_pages()
in fs/exec.c slides it for stack_shift, etc.

Signed-off-by: John Reiser
Cc: Roland McGrath
Cc: Jakub Jelinek
Cc: Ulrich Drepper
Cc: Benjamin Herrenschmidt
Signed-off-by: Andrew Morton
Signed-off-by: Linus Torvalds

John Reiser
2008-07-23 00:59:40 +0800

22 Jul, 2008

5 commits

36ce6dad6 driver core: Suppress sysfs warnings for device_rename(). ... Browse Code »

driver core: Suppress sysfs warnings for device_rename().

Renaming network devices to an already existing name is not
something we want sysfs to print a scary warning for, since the
callers can deal with this correctly. So let's introduce
sysfs_create_link_nowarn() which gets rid of the common warning.

Signed-off-by: Cornelia Huck
Signed-off-by: Greg Kroah-Hartman

Cornelia Huck
2008-07-22 12:55:01 +0800
9505e6375 debugfs: Implement debugfs_remove_recursive() ... Browse Code »

debugfs_remove_recursive() will remove a dentry and all its children.
Drivers can use this to zap their whole debugfs tree so that they don't
need to keep track of every single debugfs dentry they created.

It may fail to remove the whole tree in certain cases:

sh-3.2# rmmod atmel-mci < /sys/kernel/debug/mmc0/ios/clock
mmc0: card b368 removed
atmel_mci atmel_mci.0: Lost dma0chan1, falling back to PIO
sh-3.2# ls /sys/kernel/debug/mmc0/
ios

But I'm not sure if that case can be handled in any sane manner.

Signed-off-by: Haavard Skinnemoen
Cc: Pierre Ossman
Signed-off-by: Greg Kroah-Hartman

Haavard Skinnemoen
2008-07-22 12:54:59 +0800
93265d13e sysfs: don't call notify_change ... Browse Code »

sysfs_chmod_file() calls notify_change() to change the permission bits
on a sysfs file. Replace with explicit call to sysfs_setattr() and
fsnotify_change().

This is equivalent, except that security_inode_setattr() is not
called. This function is called by drivers, so the security checks do
not make any sense.

Signed-off-by: Miklos Szeredi
Signed-off-by: Greg Kroah-Hartman

Miklos Szeredi
2008-07-22 12:54:57 +0800
aab0de245 driver core: remove KOBJ_NAME_LEN define ... Browse Code »

Kobjects do not have a limit in name size since a while, so stop
pretending that they do.

Signed-off-by: Kay Sievers
Signed-off-by: Greg Kroah-Hartman

Kay Sievers
2008-07-22 12:54:52 +0800
6143b5997 device create: coda: convert device_create to device_create_drvdata ... Browse Code »

device_create() is race-prone, so use the race-free
device_create_drvdata() instead as device_create() is going away.

Cc: Jan Harkes
Signed-off-by: Greg Kroah-Hartman

Greg Kroah-Hartman
2008-07-22 12:54:41 +0800

21 Jul, 2008

3 commits

14b395e35 Merge branch 'for-2.6.27' of git://linux-nfs.org/~bfields/linux ... Browse Code »

* 'for-2.6.27' of git://linux-nfs.org/~bfields/linux: (51 commits)
nfsd: nfs4xdr.c do-while is not a compound statement
nfsd: Use C99 initializers in fs/nfsd/nfs4xdr.c
lockd: Pass "struct sockaddr *" to new failover-by-IP function
lockd: get host reference in nlmsvc_create_block() instead of callers
lockd: minor svclock.c style fixes
lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_lock
lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_testlock
lockd: nlm_release_host() checks for NULL, caller needn't
file lock: reorder struct file_lock to save space on 64 bit builds
nfsd: take file and mnt write in nfs4_upgrade_open
nfsd: document open share bit tracking
nfsd: tabulate nfs4 xdr encoding functions
nfsd: dprint operation names
svcrdma: Change WR context get/put to use the kmem cache
svcrdma: Create a kmem cache for the WR contexts
svcrdma: Add flush_scheduled_work to module exit function
svcrdma: Limit ORD based on client's advertised IRD
svcrdma: Remove unused wait q from svcrdma_xprt structure
svcrdma: Remove unneeded spin locks from __svc_rdma_free
svcrdma: Add dma map count and WARN_ON
...

Linus Torvalds
2008-07-21 12:21:46 +0800
db6d8c7a4 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6 ... Browse Code »

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (1232 commits)
iucv: Fix bad merging.
net_sched: Add size table for qdiscs
net_sched: Add accessor function for packet length for qdiscs
net_sched: Add qdisc_enqueue wrapper
highmem: Export totalhigh_pages.
ipv6 mcast: Omit redundant address family checks in ip6_mc_source().
net: Use standard structures for generic socket address structures.
ipv6 netns: Make several "global" sysctl variables namespace aware.
netns: Use net_eq() to compare net-namespaces for optimization.
ipv6: remove unused macros from net/ipv6.h
ipv6: remove unused parameter from ip6_ra_control
tcp: fix kernel panic with listening_get_next
tcp: Remove redundant checks when setting eff_sacks
tcp: options clean up
tcp: Fix MD5 signatures for non-linear skbs
sctp: Update sctp global memory limit allocations.
sctp: remove unnecessary byteshifting, calculate directly in big-endian
sctp: Allow only 1 listening socket with SO_REUSEADDR
sctp: Do not leak memory on multiple listen() calls
sctp: Support ipv6only AF_INET6 sockets.
...

Linus Torvalds
2008-07-21 08:43:29 +0800
f7df406dc Merge branch 'configfs-fixup-ptr-error' of git://oss.oracle.com/git/jlbec/linux-2.6 ... Browse Code »

* 'configfs-fixup-ptr-error' of git://oss.oracle.com/git/jlbec/linux-2.6:
configfs: Allow ->make_item() and ->make_group() to return detailed errors.
Revert "configfs: Allow ->make_item() and ->make_group() to return detailed errors."

Linus Torvalds
2008-07-21 08:17:52 +0800