Commit 8d9032bbe4671dc481261ccd4e161cd96e54b118

Authored by Daisuke HATAYAMA
Committed by Linus Torvalds
1 parent 93eb211e6c

elf coredump: add extended numbering support

The current ELF dumper implementation can produce broken corefiles if
program headers exceed 65535.  This number is determined by the number of
vmas which the process have.  In particular, some extreme programs may use
more than 65535 vmas.  (If you google max_map_count, you can find some
users facing this problem.) This kind of program never be able to generate
correct coredumps.

This patch implements ``extended numbering'' that uses sh_info field of
the first section header instead of e_phnum field in order to represent
upto 4294967295 vmas.

This is supported by
AMD64-ABI(http://www.x86-64.org/documentation.html) and
Solaris(http://docs.sun.com/app/docs/doc/817-1984/).
Of course, we are preparing patches for gdb and binutils.

Signed-off-by: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 7 changed files with 187 additions and 6 deletions Side-by-side Diff

arch/ia64/kernel/elfcore.c
... ... @@ -62,4 +62,20 @@
62 62 }
63 63 return 1;
64 64 }
  65 +
  66 +size_t elf_core_extra_data_size(void)
  67 +{
  68 + const struct elf_phdr *const gate_phdrs =
  69 + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
  70 + int i;
  71 + size_t size = 0;
  72 +
  73 + for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
  74 + if (gate_phdrs[i].p_type == PT_LOAD) {
  75 + size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
  76 + break;
  77 + }
  78 + }
  79 + return size;
  80 +}
arch/um/sys-i386/elfcore.c
... ... @@ -65,4 +65,20 @@
65 65 }
66 66 return 1;
67 67 }
  68 +
  69 +size_t elf_core_extra_data_size(void)
  70 +{
  71 + if ( vsyscall_ehdr ) {
  72 + const struct elfhdr *const ehdrp =
  73 + (struct elfhdr *)vsyscall_ehdr;
  74 + const struct elf_phdr *const phdrp =
  75 + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
  76 + int i;
  77 +
  78 + for (i = 0; i < ehdrp->e_phnum; ++i)
  79 + if (phdrp[i].p_type == PT_LOAD)
  80 + return (size_t) phdrp[i].p_filesz;
  81 + }
  82 + return 0;
  83 +}
... ... @@ -1838,6 +1838,34 @@
1838 1838 return gate_vma;
1839 1839 }
1840 1840  
  1841 +static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
  1842 + elf_addr_t e_shoff, int segs)
  1843 +{
  1844 + elf->e_shoff = e_shoff;
  1845 + elf->e_shentsize = sizeof(*shdr4extnum);
  1846 + elf->e_shnum = 1;
  1847 + elf->e_shstrndx = SHN_UNDEF;
  1848 +
  1849 + memset(shdr4extnum, 0, sizeof(*shdr4extnum));
  1850 +
  1851 + shdr4extnum->sh_type = SHT_NULL;
  1852 + shdr4extnum->sh_size = elf->e_shnum;
  1853 + shdr4extnum->sh_link = elf->e_shstrndx;
  1854 + shdr4extnum->sh_info = segs;
  1855 +}
  1856 +
  1857 +static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
  1858 + unsigned long mm_flags)
  1859 +{
  1860 + struct vm_area_struct *vma;
  1861 + size_t size = 0;
  1862 +
  1863 + for (vma = first_vma(current, gate_vma); vma != NULL;
  1864 + vma = next_vma(vma, gate_vma))
  1865 + size += vma_dump_size(vma, mm_flags);
  1866 + return size;
  1867 +}
  1868 +
1841 1869 /*
1842 1870 * Actual dumper
1843 1871 *
... ... @@ -1857,6 +1885,9 @@
1857 1885 unsigned long mm_flags;
1858 1886 struct elf_note_info info;
1859 1887 struct elf_phdr *phdr4note = NULL;
  1888 + struct elf_shdr *shdr4extnum = NULL;
  1889 + Elf_Half e_phnum;
  1890 + elf_addr_t e_shoff;
1860 1891  
1861 1892 /*
1862 1893 * We no longer stop all VM operations.
1863 1894  
... ... @@ -1885,12 +1916,19 @@
1885 1916 if (gate_vma != NULL)
1886 1917 segs++;
1887 1918  
  1919 + /* for notes section */
  1920 + segs++;
  1921 +
  1922 + /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
  1923 + * this, kernel supports extended numbering. Have a look at
  1924 + * include/linux/elf.h for further information. */
  1925 + e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
  1926 +
1888 1927 /*
1889 1928 * Collect all the non-memory information about the process for the
1890 1929 * notes. This also sets up the file header.
1891 1930 */
1892   - if (!fill_note_info(elf, segs + 1, /* including notes section */
1893   - &info, cprm->signr, cprm->regs))
  1931 + if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1894 1932 goto cleanup;
1895 1933  
1896 1934 has_dumped = 1;
... ... @@ -1900,7 +1938,7 @@
1900 1938 set_fs(KERNEL_DS);
1901 1939  
1902 1940 offset += sizeof(*elf); /* Elf header */
1903   - offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
  1941 + offset += segs * sizeof(struct elf_phdr); /* Program headers */
1904 1942 foffset = offset;
1905 1943  
1906 1944 /* Write notes phdr entry */
... ... @@ -1926,6 +1964,19 @@
1926 1964 */
1927 1965 mm_flags = current->mm->flags;
1928 1966  
  1967 + offset += elf_core_vma_data_size(gate_vma, mm_flags);
  1968 + offset += elf_core_extra_data_size();
  1969 + e_shoff = offset;
  1970 +
  1971 + if (e_phnum == PN_XNUM) {
  1972 + shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
  1973 + if (!shdr4extnum)
  1974 + goto end_coredump;
  1975 + fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
  1976 + }
  1977 +
  1978 + offset = dataoff;
  1979 +
1929 1980 size += sizeof(*elf);
1930 1981 if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1931 1982 goto end_coredump;
1932 1983  
... ... @@ -2003,11 +2054,20 @@
2003 2054 if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2004 2055 goto end_coredump;
2005 2056  
  2057 + if (e_phnum == PN_XNUM) {
  2058 + size += sizeof(*shdr4extnum);
  2059 + if (size > cprm->limit
  2060 + || !dump_write(cprm->file, shdr4extnum,
  2061 + sizeof(*shdr4extnum)))
  2062 + goto end_coredump;
  2063 + }
  2064 +
2006 2065 end_coredump:
2007 2066 set_fs(fs);
2008 2067  
2009 2068 cleanup:
2010 2069 free_note_info(&info);
  2070 + kfree(shdr4extnum);
2011 2071 kfree(phdr4note);
2012 2072 kfree(elf);
2013 2073 out:
fs/binfmt_elf_fdpic.c
... ... @@ -1505,6 +1505,22 @@
1505 1505 return sz;
1506 1506 }
1507 1507  
  1508 +static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
  1509 + elf_addr_t e_shoff, int segs)
  1510 +{
  1511 + elf->e_shoff = e_shoff;
  1512 + elf->e_shentsize = sizeof(*shdr4extnum);
  1513 + elf->e_shnum = 1;
  1514 + elf->e_shstrndx = SHN_UNDEF;
  1515 +
  1516 + memset(shdr4extnum, 0, sizeof(*shdr4extnum));
  1517 +
  1518 + shdr4extnum->sh_type = SHT_NULL;
  1519 + shdr4extnum->sh_size = elf->e_shnum;
  1520 + shdr4extnum->sh_link = elf->e_shstrndx;
  1521 + shdr4extnum->sh_info = segs;
  1522 +}
  1523 +
1508 1524 /*
1509 1525 * dump the segments for an MMU process
1510 1526 */
... ... @@ -1569,6 +1585,17 @@
1569 1585 }
1570 1586 #endif
1571 1587  
  1588 +static size_t elf_core_vma_data_size(unsigned long mm_flags)
  1589 +{
  1590 + struct vm_area_struct *vma;
  1591 + size_t size = 0;
  1592 +
  1593 + for (vma = current->mm->mmap; vma; vma->vm_next)
  1594 + if (maydump(vma, mm_flags))
  1595 + size += vma->vm_end - vma->vm_start;
  1596 + return size;
  1597 +}
  1598 +
1572 1599 /*
1573 1600 * Actual dumper
1574 1601 *
... ... @@ -1601,6 +1628,9 @@
1601 1628 elf_addr_t *auxv;
1602 1629 unsigned long mm_flags;
1603 1630 struct elf_phdr *phdr4note = NULL;
  1631 + struct elf_shdr *shdr4extnum = NULL;
  1632 + Elf_Half e_phnum;
  1633 + elf_addr_t e_shoff;
1604 1634  
1605 1635 /*
1606 1636 * We no longer stop all VM operations.
1607 1637  
... ... @@ -1667,8 +1697,16 @@
1667 1697 segs = current->mm->map_count;
1668 1698 segs += elf_core_extra_phdrs();
1669 1699  
  1700 + /* for notes section */
  1701 + segs++;
  1702 +
  1703 + /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
  1704 + * this, kernel supports extended numbering. Have a look at
  1705 + * include/linux/elf.h for further information. */
  1706 + e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
  1707 +
1670 1708 /* Set up header */
1671   - fill_elf_fdpic_header(elf, segs + 1); /* including notes section */
  1709 + fill_elf_fdpic_header(elf, e_phnum);
1672 1710  
1673 1711 has_dumped = 1;
1674 1712 current->flags |= PF_DUMPCORE;
... ... @@ -1708,7 +1746,7 @@
1708 1746 set_fs(KERNEL_DS);
1709 1747  
1710 1748 offset += sizeof(*elf); /* Elf header */
1711   - offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */
  1749 + offset += segs * sizeof(struct elf_phdr); /* Program headers */
1712 1750 foffset = offset;
1713 1751  
1714 1752 /* Write notes phdr entry */
... ... @@ -1738,6 +1776,19 @@
1738 1776 */
1739 1777 mm_flags = current->mm->flags;
1740 1778  
  1779 + offset += elf_core_vma_data_size(mm_flags);
  1780 + offset += elf_core_extra_data_size();
  1781 + e_shoff = offset;
  1782 +
  1783 + if (e_phnum == PN_XNUM) {
  1784 + shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
  1785 + if (!shdr4extnum)
  1786 + goto end_coredump;
  1787 + fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
  1788 + }
  1789 +
  1790 + offset = dataoff;
  1791 +
1741 1792 size += sizeof(*elf);
1742 1793 if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1743 1794 goto end_coredump;
... ... @@ -1801,6 +1852,14 @@
1801 1852  
1802 1853 if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
1803 1854 goto end_coredump;
  1855 +
  1856 + if (e_phnum == PN_XNUM) {
  1857 + size += sizeof(*shdr4extnum);
  1858 + if (size > cprm->limit
  1859 + || !dump_write(cprm->file, shdr4extnum,
  1860 + sizeof(*shdr4extnum)))
  1861 + goto end_coredump;
  1862 + }
1804 1863  
1805 1864 if (cprm->file->f_pos != offset) {
1806 1865 /* Sanity check */
... ... @@ -50,6 +50,28 @@
50 50  
51 51 #define PT_GNU_STACK (PT_LOOS + 0x474e551)
52 52  
  53 +/*
  54 + * Extended Numbering
  55 + *
  56 + * If the real number of program header table entries is larger than
  57 + * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
  58 + * section header at index 0, and PN_XNUM is set to e_phnum
  59 + * field. Otherwise, the section header at index 0 is zero
  60 + * initialized, if it exists.
  61 + *
  62 + * Specifications are available in:
  63 + *
  64 + * - Sun microsystems: Linker and Libraries.
  65 + * Part No: 817-1984-17, September 2008.
  66 + * URL: http://docs.sun.com/app/docs/doc/817-1984
  67 + *
  68 + * - System V ABI AMD64 Architecture Processor Supplement
  69 + * Draft Version 0.99.,
  70 + * May 11, 2009.
  71 + * URL: http://www.x86-64.org/
  72 + */
  73 +#define PN_XNUM 0xffff
  74 +
53 75 /* These constants define the different elf file types */
54 76 #define ET_NONE 0
55 77 #define ET_REL 1
... ... @@ -286,7 +308,7 @@
286 308 #define SHN_COMMON 0xfff2
287 309 #define SHN_HIRESERVE 0xffff
288 310  
289   -typedef struct {
  311 +typedef struct elf32_shdr {
290 312 Elf32_Word sh_name;
291 313 Elf32_Word sh_type;
292 314 Elf32_Word sh_flags;
... ... @@ -394,6 +416,7 @@
394 416 extern Elf32_Dyn _DYNAMIC [];
395 417 #define elfhdr elf32_hdr
396 418 #define elf_phdr elf32_phdr
  419 +#define elf_shdr elf32_shdr
397 420 #define elf_note elf32_note
398 421 #define elf_addr_t Elf32_Off
399 422 #define Elf_Half Elf32_Half
... ... @@ -403,6 +426,7 @@
403 426 extern Elf64_Dyn _DYNAMIC [];
404 427 #define elfhdr elf64_hdr
405 428 #define elf_phdr elf64_phdr
  429 +#define elf_shdr elf64_shdr
406 430 #define elf_note elf64_note
407 431 #define elf_addr_t Elf64_Off
408 432 #define Elf_Half Elf64_Half
include/linux/elfcore.h
... ... @@ -166,6 +166,7 @@
166 166 unsigned long limit);
167 167 extern int
168 168 elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
  169 +extern size_t elf_core_extra_data_size(void);
169 170  
170 171 #endif /* _LINUX_ELFCORE_H */
... ... @@ -21,4 +21,9 @@
21 21 {
22 22 return 1;
23 23 }
  24 +
  25 +size_t __weak elf_core_extra_data_size(void)
  26 +{
  27 + return 0;
  28 +}