10 Oct, 2012

1 commit

  • This is the revised patch for fixing rds-ping spinlock recursion
    according to Venkat's suggestions.

    RDS ping/pong over TCP feature has been broken for years(2.6.39 to
    3.6.0) since we have to set TCP cork and call kernel_sendmsg() between
    ping/pong which both need to lock "struct sock *sk". However, this
    lock has already been hold before rds_tcp_data_ready() callback is
    triggerred. As a result, we always facing spinlock resursion which
    would resulting in system panic.

    Given that RDS ping is only used to test the connectivity and not for
    serious performance measurements, we can queue the pong transmit to
    rds_wq as a delayed response.

    Reported-by: Dan Carpenter
    CC: Venkat Venkatsubra
    CC: David S. Miller
    CC: James Morris
    Signed-off-by: Jie Liu
    Signed-off-by: David S. Miller

    jeff.liu
     

23 Aug, 2012

1 commit


23 Jul, 2012

1 commit

  • Jay Fenlason (fenlason@redhat.com) found a bug,
    that recvfrom() on an RDS socket can return the contents of random kernel
    memory to userspace if it was called with a address length larger than
    sizeof(struct sockaddr_in).
    rds_recvmsg() also fails to set the addr_len paramater properly before
    returning, but that's just a bug.
    There are also a number of cases wher recvfrom() can return an entirely bogus
    address. Anything in rds_recvmsg() that returns a non-negative value but does
    not go through the "sin = (struct sockaddr_in *)msg->msg_name;" code path
    at the end of the while(1) loop will return up to 128 bytes of kernel memory
    to userspace.

    And I write two test programs to reproduce this bug, you will see that in
    rds_server, fromAddr will be overwritten and the following sock_fd will be
    destroyed.
    Yes, it is the programmer's fault to set msg_namelen incorrectly, but it is
    better to make the kernel copy the real length of address to user space in
    such case.

    How to run the test programs ?
    I test them on 32bit x86 system, 3.5.0-rc7.

    1 compile
    gcc -o rds_client rds_client.c
    gcc -o rds_server rds_server.c

    2 run ./rds_server on one console

    3 run ./rds_client on another console

    4 you will see something like:
    server is waiting to receive data...
    old socket fd=3
    server received data from client:data from client
    msg.msg_namelen=32
    new socket fd=-1067277685
    sendmsg()
    : Bad file descriptor

    /***************** rds_client.c ********************/

    int main(void)
    {
    int sock_fd;
    struct sockaddr_in serverAddr;
    struct sockaddr_in toAddr;
    char recvBuffer[128] = "data from client";
    struct msghdr msg;
    struct iovec iov;

    sock_fd = socket(AF_RDS, SOCK_SEQPACKET, 0);
    if (sock_fd < 0) {
    perror("create socket error\n");
    exit(1);
    }

    memset(&serverAddr, 0, sizeof(serverAddr));
    serverAddr.sin_family = AF_INET;
    serverAddr.sin_addr.s_addr = inet_addr("127.0.0.1");
    serverAddr.sin_port = htons(4001);

    if (bind(sock_fd, (struct sockaddr*)&serverAddr, sizeof(serverAddr)) < 0) {
    perror("bind() error\n");
    close(sock_fd);
    exit(1);
    }

    memset(&toAddr, 0, sizeof(toAddr));
    toAddr.sin_family = AF_INET;
    toAddr.sin_addr.s_addr = inet_addr("127.0.0.1");
    toAddr.sin_port = htons(4000);
    msg.msg_name = &toAddr;
    msg.msg_namelen = sizeof(toAddr);
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    msg.msg_iov->iov_base = recvBuffer;
    msg.msg_iov->iov_len = strlen(recvBuffer) + 1;
    msg.msg_control = 0;
    msg.msg_controllen = 0;
    msg.msg_flags = 0;

    if (sendmsg(sock_fd, &msg, 0) == -1) {
    perror("sendto() error\n");
    close(sock_fd);
    exit(1);
    }

    printf("client send data:%s\n", recvBuffer);

    memset(recvBuffer, '\0', 128);

    msg.msg_name = &toAddr;
    msg.msg_namelen = sizeof(toAddr);
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    msg.msg_iov->iov_base = recvBuffer;
    msg.msg_iov->iov_len = 128;
    msg.msg_control = 0;
    msg.msg_controllen = 0;
    msg.msg_flags = 0;
    if (recvmsg(sock_fd, &msg, 0) == -1) {
    perror("recvmsg() error\n");
    close(sock_fd);
    exit(1);
    }

    printf("receive data from server:%s\n", recvBuffer);

    close(sock_fd);

    return 0;
    }

    /***************** rds_server.c ********************/

    int main(void)
    {
    struct sockaddr_in fromAddr;
    int sock_fd;
    struct sockaddr_in serverAddr;
    unsigned int addrLen;
    char recvBuffer[128];
    struct msghdr msg;
    struct iovec iov;

    sock_fd = socket(AF_RDS, SOCK_SEQPACKET, 0);
    if(sock_fd < 0) {
    perror("create socket error\n");
    exit(0);
    }

    memset(&serverAddr, 0, sizeof(serverAddr));
    serverAddr.sin_family = AF_INET;
    serverAddr.sin_addr.s_addr = inet_addr("127.0.0.1");
    serverAddr.sin_port = htons(4000);
    if (bind(sock_fd, (struct sockaddr*)&serverAddr, sizeof(serverAddr)) < 0) {
    perror("bind error\n");
    close(sock_fd);
    exit(1);
    }

    printf("server is waiting to receive data...\n");
    msg.msg_name = &fromAddr;

    /*
    * I add 16 to sizeof(fromAddr), ie 32,
    * and pay attention to the definition of fromAddr,
    * recvmsg() will overwrite sock_fd,
    * since kernel will copy 32 bytes to userspace.
    *
    * If you just use sizeof(fromAddr), it works fine.
    * */
    msg.msg_namelen = sizeof(fromAddr) + 16;
    /* msg.msg_namelen = sizeof(fromAddr); */
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    msg.msg_iov->iov_base = recvBuffer;
    msg.msg_iov->iov_len = 128;
    msg.msg_control = 0;
    msg.msg_controllen = 0;
    msg.msg_flags = 0;

    while (1) {
    printf("old socket fd=%d\n", sock_fd);
    if (recvmsg(sock_fd, &msg, 0) == -1) {
    perror("recvmsg() error\n");
    close(sock_fd);
    exit(1);
    }
    printf("server received data from client:%s\n", recvBuffer);
    printf("msg.msg_namelen=%d\n", msg.msg_namelen);
    printf("new socket fd=%d\n", sock_fd);
    strcat(recvBuffer, "--data from server");
    if (sendmsg(sock_fd, &msg, 0) == -1) {
    perror("sendmsg()\n");
    close(sock_fd);
    exit(1);
    }
    }

    close(sock_fd);
    return 0;
    }

    Signed-off-by: Weiping Pan
    Signed-off-by: David S. Miller

    Weiping Pan
     

11 Jul, 2012

1 commit


30 May, 2012

1 commit

  • RDS code assumes that the struct ib_device dma_device member, which is a
    pointer, points to a struct device embedded in a struct pci_dev.

    This is not the case for ehca, for example, which is a OF driver, and
    makes dma_device point to a struct device embedded in a struct
    platform_device.

    This will make the system crash when rds_rdma is loaded in a system
    with ehca, since it will try to access the bus member of a non-existent
    struct pci_dev.

    The only reason rds_rdma uses the struct pci_dev is to get the NUMA node
    the device is attached to. Using dev_to_node for that is much better,
    since it won't assume which bus the infiniband is attached to.

    Signed-off-by: Thadeu Lima de Souza Cascardo
    Cc: dledford@redhat.com
    Cc: Jes.Sorensen@redhat.com
    Cc: Venkat Venkatsubra
    Acked-by: Venkat Venkatsubra
    Signed-off-by: David S. Miller

    Thadeu Lima de Souza Cascardo
     

22 Apr, 2012

1 commit


21 Apr, 2012

2 commits

  • This results in code with less boiler plate that is a bit easier
    to read.

    Additionally stops us from using compatibility code in the sysctl
    core, hastening the day when the compatibility code can be removed.

    Signed-off-by: Eric W. Biederman
    Acked-by: Pavel Emelyanov
    Signed-off-by: David S. Miller

    Eric W. Biederman
     
  • This makes it clearer which sysctls are relative to your current network
    namespace.

    This makes it a little less error prone by not exposing sysctls for the
    initial network namespace in other namespaces.

    This is the same way we handle all of our other network interfaces to
    userspace and I can't honestly remember why we didn't do this for
    sysctls right from the start.

    Signed-off-by: Eric W. Biederman
    Acked-by: Pavel Emelyanov
    Signed-off-by: David S. Miller

    Eric W. Biederman
     

23 Mar, 2012

1 commit

  • We should be using the gfp flags the caller specified here, instead of
    GFP_KERNEL. I think this might be a bugfix, depending on the value of
    "sock->sk->sk_allocation" when we call rds_conn_create_outgoing() in
    rds_sendmsg(). Otherwise, it's just a cleanup.

    Signed-off-by: Dan Carpenter
    Acked-by: Venkat Venkatsubra
    Signed-off-by: David S. Miller

    Dan Carpenter
     

22 Mar, 2012

1 commit

  • Pull kmap_atomic cleanup from Cong Wang.

    It's been in -next for a long time, and it gets rid of the (no longer
    used) second argument to k[un]map_atomic().

    Fix up a few trivial conflicts in various drivers, and do an "evil
    merge" to catch some new uses that have come in since Cong's tree.

    * 'kmap_atomic' of git://github.com/congwang/linux: (59 commits)
    feature-removal-schedule.txt: schedule the deprecated form of kmap_atomic() for removal
    highmem: kill all __kmap_atomic() [swarren@nvidia.com: highmem: Fix ARM build break due to __kmap_atomic rename]
    drbd: remove the second argument of k[un]map_atomic()
    zcache: remove the second argument of k[un]map_atomic()
    gma500: remove the second argument of k[un]map_atomic()
    dm: remove the second argument of k[un]map_atomic()
    tomoyo: remove the second argument of k[un]map_atomic()
    sunrpc: remove the second argument of k[un]map_atomic()
    rds: remove the second argument of k[un]map_atomic()
    net: remove the second argument of k[un]map_atomic()
    mm: remove the second argument of k[un]map_atomic()
    lib: remove the second argument of k[un]map_atomic()
    power: remove the second argument of k[un]map_atomic()
    kdb: remove the second argument of k[un]map_atomic()
    udf: remove the second argument of k[un]map_atomic()
    ubifs: remove the second argument of k[un]map_atomic()
    squashfs: remove the second argument of k[un]map_atomic()
    reiserfs: remove the second argument of k[un]map_atomic()
    ocfs2: remove the second argument of k[un]map_atomic()
    ntfs: remove the second argument of k[un]map_atomic()
    ...

    Linus Torvalds
     

21 Mar, 2012

2 commits

  • Pull trivial tree from Jiri Kosina:
    "It's indeed trivial -- mostly documentation updates and a bunch of
    typo fixes from Masanari.

    There are also several linux/version.h include removals from Jesper."

    * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial: (101 commits)
    kcore: fix spelling in read_kcore() comment
    constify struct pci_dev * in obvious cases
    Revert "char: Fix typo in viotape.c"
    init: fix wording error in mm_init comment
    usb: gadget: Kconfig: fix typo for 'different'
    Revert "power, max8998: Include linux/module.h just once in drivers/power/max8998_charger.c"
    writeback: fix fn name in writeback_inodes_sb_nr_if_idle() comment header
    writeback: fix typo in the writeback_control comment
    Documentation: Fix multiple typo in Documentation
    tpm_tis: fix tis_lock with respect to RCU
    Revert "media: Fix typo in mixer_drv.c and hdmi_drv.c"
    Doc: Update numastat.txt
    qla4xxx: Add missing spaces to error messages
    compiler.h: Fix typo
    security: struct security_operations kerneldoc fix
    Documentation: broken URL in libata.tmpl
    Documentation: broken URL in filesystems.tmpl
    mtd: simplify return logic in do_map_probe()
    mm: fix comment typo of truncate_inode_pages_range
    power: bq27x00: Fix typos in comment
    ...

    Linus Torvalds
     
  • no socket layer outputs a message for this error and neither should rds.

    Signed-off-by: Dave Jones
    Signed-off-by: David S. Miller

    Dave Jones
     

20 Mar, 2012

1 commit


10 Feb, 2012

1 commit


25 Jan, 2012

1 commit

  • rds_sock_info() triggers locking warnings because we try to perform a
    local_bh_enable() (via sock_i_ino()) while hardware interrupts are
    disabled (via taking rds_sock_lock).

    There is no reason for rds_sock_lock to be a hardware IRQ disabling
    lock, none of these access paths run in hardware interrupt context.

    Therefore making it a BH disabling lock is safe and sufficient to
    fix this bug.

    Reported-by: Kumar Sanghvi
    Reported-by: Josh Boyer
    Signed-off-by: David S. Miller

    David S. Miller
     

13 Jan, 2012

1 commit


14 Nov, 2011

1 commit

  • Commit 1bc144b625 ("net, rds, Replace xlist in net/rds/xlist.h with
    llist") added "select LLIST" to the RDS_RDMA Kconfig entry. But there is
    no Kconfig symbol named LLIST. The select statement for that symbol is a
    nop. Drop it.

    lib/llist.o is builtin, so all that's needed to use the llist
    functionality is to include linux/llist.h, which this commit also did.

    Signed-off-by: Paul Bolle
    Signed-off-by: David S. Miller

    Paul Bolle
     

07 Nov, 2011

1 commit

  • * 'modsplit-Oct31_2011' of git://git.kernel.org/pub/scm/linux/kernel/git/paulg/linux: (230 commits)
    Revert "tracing: Include module.h in define_trace.h"
    irq: don't put module.h into irq.h for tracking irqgen modules.
    bluetooth: macroize two small inlines to avoid module.h
    ip_vs.h: fix implicit use of module_get/module_put from module.h
    nf_conntrack.h: fix up fallout from implicit moduleparam.h presence
    include: replace linux/module.h with "struct module" wherever possible
    include: convert various register fcns to macros to avoid include chaining
    crypto.h: remove unused crypto_tfm_alg_modname() inline
    uwb.h: fix implicit use of asm/page.h for PAGE_SIZE
    pm_runtime.h: explicitly requires notifier.h
    linux/dmaengine.h: fix implicit use of bitmap.h and asm/page.h
    miscdevice.h: fix up implicit use of lists and types
    stop_machine.h: fix implicit use of smp.h for smp_processor_id
    of: fix implicit use of errno.h in include/linux/of.h
    of_platform.h: delete needless include
    acpi: remove module.h include from platform/aclinux.h
    miscdevice.h: delete unnecessary inclusion of module.h
    device_cgroup.h: delete needless include
    net: sch_generic remove redundant use of
    net: inet_timewait_sock doesnt need
    ...

    Fix up trivial conflicts (other header files, and removal of the ab3550 mfd driver) in
    - drivers/media/dvb/frontends/dibx000_common.c
    - drivers/media/video/{mt9m111.c,ov6650.c}
    - drivers/mfd/ab3550-core.c
    - include/linux/dmaengine.h

    Linus Torvalds
     

01 Nov, 2011

4 commits


25 Oct, 2011

2 commits

  • * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1745 commits)
    dp83640: free packet queues on remove
    dp83640: use proper function to free transmit time stamping packets
    ipv6: Do not use routes from locally generated RAs
    |PATCH net-next] tg3: add tx_dropped counter
    be2net: don't create multiple RX/TX rings in multi channel mode
    be2net: don't create multiple TXQs in BE2
    be2net: refactor VF setup/teardown code into be_vf_setup/clear()
    be2net: add vlan/rx-mode/flow-control config to be_setup()
    net_sched: cls_flow: use skb_header_pointer()
    ipv4: avoid useless call of the function check_peer_pmtu
    TCP: remove TCP_DEBUG
    net: Fix driver name for mdio-gpio.c
    ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT
    rtnetlink: Add missing manual netlink notification in dev_change_net_namespaces
    ipv4: fix ipsec forward performance regression
    jme: fix irq storm after suspend/resume
    route: fix ICMP redirect validation
    net: hold sock reference while processing tx timestamps
    tcp: md5: add more const attributes
    Add ethtool -g support to virtio_net
    ...

    Fix up conflicts in:
    - drivers/net/Kconfig:
    The split-up generated a trivial conflict with removal of a
    stale reference to Documentation/networking/net-modules.txt.
    Remove it from the new location instead.
    - fs/sysfs/dir.c:
    Fairly nasty conflicts with the sysfs rb-tree usage, conflicting
    with Eric Biederman's changes for tagged directories.

    Linus Torvalds
     
  • * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial: (59 commits)
    MAINTAINERS: linux-m32r is moderated for non-subscribers
    linux@lists.openrisc.net is moderated for non-subscribers
    Drop default from "DM365 codec select" choice
    parisc: Kconfig: cleanup Kernel page size default
    Kconfig: remove redundant CONFIG_ prefix on two symbols
    cris: remove arch/cris/arch-v32/lib/nand_init.S
    microblaze: add missing CONFIG_ prefixes
    h8300: drop puzzling Kconfig dependencies
    MAINTAINERS: microblaze-uclinux@itee.uq.edu.au is moderated for non-subscribers
    tty: drop superfluous dependency in Kconfig
    ARM: mxc: fix Kconfig typo 'i.MX51'
    Fix file references in Kconfig files
    aic7xxx: fix Kconfig references to READMEs
    Fix file references in drivers/ide/
    thinkpad_acpi: Fix printk typo 'bluestooth'
    bcmring: drop commented out line in Kconfig
    btmrvl_sdio: fix typo 'btmrvl_sdio_sd6888'
    doc: raw1394: Trivial typo fix
    CIFS: Don't free volume_info->UNC until we are entirely done with it.
    treewide: Correct spelling of successfully in comments
    ...

    Linus Torvalds
     

08 Oct, 2011

1 commit


30 Sep, 2011

1 commit

  • In the rds_iw_mr_pool struct the free_pinned field keeps track of
    memory pinned by free MRs. While this field is incremented properly
    upon allocation, it is never decremented upon unmapping. This would
    cause the rds_rdma module to crash the kernel upon unloading, by
    triggering the BUG_ON in the rds_iw_destroy_mr_pool function.

    This change keeps track of the MRs that become unpinned, so that
    free_pinned can be decremented appropriately.

    Signed-off-by: Jonathan Lallinger
    Signed-off-by: Steve Wise
    Signed-off-by: David S. Miller

    Jonathan Lallinger
     

16 Sep, 2011

1 commit

  • The functionality of xlist and llist is almost same. This patch
    replace xlist with llist to avoid code duplication.

    Known issues: don't know how to test this, need special hardware?

    Signed-off-by: Huang Ying
    Cc: Chris Mason
    Cc: Andy Grover
    Cc: "David S. Miller"
    Signed-off-by: David S. Miller

    Huang Ying
     

15 Sep, 2011

1 commit


26 Jul, 2011

1 commit

  • We presently define all kinds of notifiers in notifier.h. This is not
    necessary at all, since different subsystems use different notifiers, they
    are almost non-related with each other.

    This can also save much build time. Suppose I add a new netdevice event,
    really I don't have to recompile all the source, just network related.
    Without this patch, all the source will be recompiled.

    I move the notify events near to their subsystem notifier registers, so
    that they can be found more easily.

    This patch:

    It is not necessary to share the same notifier.h.

    Signed-off-by: WANG Cong
    Cc: David Miller
    Cc: "Rafael J. Wysocki"
    Cc: Greg KH
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Amerigo Wang
     

02 Jul, 2011

1 commit


17 Jun, 2011

1 commit


07 Jun, 2011

1 commit


26 May, 2011

1 commit

  • The RDMA CM currently infers the QP type from the port space selected
    by the user. In the future (eg with RDMA_PS_IB or XRC), there may not
    be a 1-1 correspondence between port space and QP type. For netlink
    export of RDMA CM state, we want to export the QP type to userspace,
    so it is cleaner to explicitly associate a QP type to an ID.

    Modify rdma_create_id() to allow the user to specify the QP type, and
    use it to make our selections of datagram versus connected mode.

    Signed-off-by: Sean Hefty
    Signed-off-by: Roland Dreier

    Sean Hefty
     

31 Mar, 2011

1 commit


24 Mar, 2011

2 commits

  • As a preparation for removing ext2 non-atomic bit operations from
    asm/bitops.h. This converts ext2 non-atomic bit operations to
    little-endian bit operations.

    Signed-off-by: Akinobu Mita
    Cc: Andy Grover
    Cc: "David S. Miller"
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Akinobu Mita
     
  • asm-generic/bitops/le.h is only intended to be included directly from
    asm-generic/bitops/ext2-non-atomic.h or asm-generic/bitops/minix-le.h
    which implements generic ext2 or minix bit operations.

    This stops including asm-generic/bitops/le.h directly and use ext2
    non-atomic bit operations instead.

    It seems odd to use ext2_*_bit() on rds, but it will replaced with
    __{set,clear,test}_bit_le() after introducing little endian bit operations
    for all architectures. This indirect step is necessary to maintain
    bisectability for some architectures which have their own little-endian
    bit operations.

    Signed-off-by: Akinobu Mita
    Cc: Andy Grover
    Cc: "David S. Miller"
    Signed-off-by: Andrew Morton
    Signed-off-by: Linus Torvalds

    Akinobu Mita
     

17 Mar, 2011

1 commit

  • * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1480 commits)
    bonding: enable netpoll without checking link status
    xfrm: Refcount destination entry on xfrm_lookup
    net: introduce rx_handler results and logic around that
    bonding: get rid of IFF_SLAVE_INACTIVE netdev->priv_flag
    bonding: wrap slave state work
    net: get rid of multiple bond-related netdevice->priv_flags
    bonding: register slave pointer for rx_handler
    be2net: Bump up the version number
    be2net: Copyright notice change. Update to Emulex instead of ServerEngines
    e1000e: fix kconfig for crc32 dependency
    netfilter ebtables: fix xt_AUDIT to work with ebtables
    xen network backend driver
    bonding: Improve syslog message at device creation time
    bonding: Call netif_carrier_off after register_netdevice
    bonding: Incorrect TX queue offset
    net_sched: fix ip_tos2prio
    xfrm: fix __xfrm_route_forward()
    be2net: Fix UDP packet detected status in RX compl
    Phonet: fix aligned-mode pipe socket buffer header reserve
    netxen: support for GbE port settings
    ...

    Fix up conflicts in drivers/staging/brcm80211/brcmsmac/wl_mac80211.c
    with the staging updates.

    Linus Torvalds
     

16 Mar, 2011

1 commit

  • * 'for-2.6.39' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
    workqueue: fix build failure introduced by s/freezeable/freezable/
    workqueue: add system_freezeable_wq
    rds/ib: use system_wq instead of rds_ib_fmr_wq
    net/9p: replace p9_poll_task with a work
    net/9p: use system_wq instead of p9_mux_wq
    xfs: convert to alloc_workqueue()
    reiserfs: make commit_wq use the default concurrency level
    ocfs2: use system_wq instead of ocfs2_quota_wq
    ext4: convert to alloc_workqueue()
    scsi/scsi_tgt_lib: scsi_tgtd isn't used in memory reclaim path
    scsi/be2iscsi,qla2xxx: convert to alloc_workqueue()
    misc/iwmc3200top: use system_wq instead of dedicated workqueues
    i2o: use alloc_workqueue() instead of create_workqueue()
    acpi: kacpi*_wq don't need WQ_MEM_RECLAIM
    fs/aio: aio_wq isn't used in memory reclaim path
    input/tps6507x-ts: use system_wq instead of dedicated workqueue
    cpufreq: use system_wq instead of dedicated workqueues
    wireless/ipw2x00: use system_wq instead of dedicated workqueues
    arm/omap: use system_wq in mailbox
    workqueue: use WQ_MEM_RECLAIM instead of WQ_RESCUER

    Linus Torvalds
     

11 Mar, 2011

1 commit


09 Mar, 2011

1 commit

  • Recently had this bug halt reported to me:

    kernel BUG at net/rds/send.c:329!
    Oops: Exception in kernel mode, sig: 5 [#1]
    SMP NR_CPUS=1024 NUMA pSeries
    Modules linked in: rds sunrpc ipv6 dm_mirror dm_region_hash dm_log ibmveth sg
    ext4 jbd2 mbcache sd_mod crc_t10dif ibmvscsic scsi_transport_srp scsi_tgt
    dm_mod [last unloaded: scsi_wait_scan]
    NIP: d000000003ca68f4 LR: d000000003ca67fc CTR: d000000003ca8770
    REGS: c000000175cab980 TRAP: 0700 Not tainted (2.6.32-118.el6.ppc64)
    MSR: 8000000000029032 CR: 44000022 XER: 00000000
    TASK = c00000017586ec90[1896] 'krdsd' THREAD: c000000175ca8000 CPU: 0
    GPR00: 0000000000000150 c000000175cabc00 d000000003cb7340 0000000000002030
    GPR04: ffffffffffffffff 0000000000000030 0000000000000000 0000000000000030
    GPR08: 0000000000000001 0000000000000001 c0000001756b1e30 0000000000010000
    GPR12: d000000003caac90 c000000000fa2500 c0000001742b2858 c0000001742b2a00
    GPR16: c0000001742b2a08 c0000001742b2820 0000000000000001 0000000000000001
    GPR20: 0000000000000040 c0000001742b2814 c000000175cabc70 0800000000000000
    GPR24: 0000000000000004 0200000000000000 0000000000000000 c0000001742b2860
    GPR28: 0000000000000000 c0000001756b1c80 d000000003cb68e8 c0000001742b27b8
    NIP [d000000003ca68f4] .rds_send_xmit+0x4c4/0x8a0 [rds]
    LR [d000000003ca67fc] .rds_send_xmit+0x3cc/0x8a0 [rds]
    Call Trace:
    [c000000175cabc00] [d000000003ca67fc] .rds_send_xmit+0x3cc/0x8a0 [rds]
    (unreliable)
    [c000000175cabd30] [d000000003ca7e64] .rds_send_worker+0x54/0x100 [rds]
    [c000000175cabdb0] [c0000000000b475c] .worker_thread+0x1dc/0x3c0
    [c000000175cabed0] [c0000000000baa9c] .kthread+0xbc/0xd0
    [c000000175cabf90] [c000000000032114] .kernel_thread+0x54/0x70
    Instruction dump:
    4bfffd50 60000000 60000000 39080001 935f004c f91f0040 41820024 813d017c
    7d094a78 7d290074 7929d182 394a0020 40e2ff68 4bffffa4 39200000
    Kernel panic - not syncing: Fatal exception
    Call Trace:
    [c000000175cab560] [c000000000012e04] .show_stack+0x74/0x1c0 (unreliable)
    [c000000175cab610] [c0000000005a365c] .panic+0x80/0x1b4
    [c000000175cab6a0] [c00000000002fbcc] .die+0x21c/0x2a0
    [c000000175cab750] [c000000000030000] ._exception+0x110/0x220
    [c000000175cab910] [c000000000004b9c] program_check_common+0x11c/0x180

    Signed-off-by: David S. Miller

    Neil Horman