Commit 23dcfa61bac244e1200ff9ad19c6e9144dcb6bb5
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
Merge branch 'akpm' (Andrew's patch-bomb)
Merge fixes from Andrew Morton. Random drivers and some VM fixes. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (17 commits) mm: compaction: Abort async compaction if locks are contended or taking too long mm: have order > 0 compaction start near a pageblock with free pages rapidio/tsi721: fix unused variable compiler warning rapidio/tsi721: fix inbound doorbell interrupt handling drivers/rtc/rtc-rs5c348.c: fix hour decoding in 12-hour mode mm: correct page->pfmemalloc to fix deactivate_slab regression drivers/rtc/rtc-pcf2123.c: initialize dynamic sysfs attributes mm/compaction.c: fix deferring compaction mistake drivers/misc/sgi-xp/xpc_uv.c: SGI XPC fails to load when cpu 0 is out of IRQ resources string: do not export memweight() to userspace hugetlb: update hugetlbpage.txt checkpatch: add control statement test to SINGLE_STATEMENT_DO_WHILE_MACRO mm: hugetlbfs: correctly populate shared pmd cciss: fix incorrect scsi status reporting Documentation: update mount option in filesystem/vfat.txt mm: change nr_ptes BUG_ON to WARN_ON cs5535-clockevt: typo, it's MFGPT, not MFPGT
Showing 16 changed files Side-by-side Diff
- Documentation/filesystems/vfat.txt
- Documentation/vm/hugetlbpage.txt
- arch/x86/mm/hugetlbpage.c
- drivers/block/cciss_scsi.c
- drivers/clocksource/cs5535-clockevt.c
- drivers/misc/sgi-xp/xpc_uv.c
- drivers/rapidio/devices/tsi721.c
- drivers/rtc/rtc-pcf2123.c
- drivers/rtc/rtc-rs5c348.c
- include/linux/compaction.h
- include/linux/string.h
- mm/compaction.c
- mm/internal.h
- mm/mmap.c
- mm/page_alloc.c
- scripts/checkpatch.pl
Documentation/filesystems/vfat.txt
... | ... | @@ -137,6 +137,17 @@ |
137 | 137 | without doing anything or remount the partition in |
138 | 138 | read-only mode (default behavior). |
139 | 139 | |
140 | +discard -- If set, issues discard/TRIM commands to the block | |
141 | + device when blocks are freed. This is useful for SSD devices | |
142 | + and sparse/thinly-provisoned LUNs. | |
143 | + | |
144 | +nfs -- This option maintains an index (cache) of directory | |
145 | + inodes by i_logstart which is used by the nfs-related code to | |
146 | + improve look-ups. | |
147 | + | |
148 | + Enable this only if you want to export the FAT filesystem | |
149 | + over NFS | |
150 | + | |
140 | 151 | <bool>: 0,1,yes,no,true,false |
141 | 152 | |
142 | 153 | TODO |
Documentation/vm/hugetlbpage.txt
... | ... | @@ -299,12 +299,18 @@ |
299 | 299 | ******************************************************************* |
300 | 300 | |
301 | 301 | /* |
302 | - * hugepage-shm: see Documentation/vm/hugepage-shm.c | |
302 | + * map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c | |
303 | 303 | */ |
304 | 304 | |
305 | 305 | ******************************************************************* |
306 | 306 | |
307 | 307 | /* |
308 | - * hugepage-mmap: see Documentation/vm/hugepage-mmap.c | |
308 | + * hugepage-shm: see tools/testing/selftests/vm/hugepage-shm.c | |
309 | + */ | |
310 | + | |
311 | +******************************************************************* | |
312 | + | |
313 | +/* | |
314 | + * hugepage-mmap: see tools/testing/selftests/vm/hugepage-mmap.c | |
309 | 315 | */ |
arch/x86/mm/hugetlbpage.c
... | ... | @@ -56,9 +56,16 @@ |
56 | 56 | } |
57 | 57 | |
58 | 58 | /* |
59 | - * search for a shareable pmd page for hugetlb. | |
59 | + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() | |
60 | + * and returns the corresponding pte. While this is not necessary for the | |
61 | + * !shared pmd case because we can allocate the pmd later as well, it makes the | |
62 | + * code much cleaner. pmd allocation is essential for the shared case because | |
63 | + * pud has to be populated inside the same i_mmap_mutex section - otherwise | |
64 | + * racing tasks could either miss the sharing (see huge_pte_offset) or select a | |
65 | + * bad pmd for sharing. | |
60 | 66 | */ |
61 | -static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |
67 | +static pte_t * | |
68 | +huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |
62 | 69 | { |
63 | 70 | struct vm_area_struct *vma = find_vma(mm, addr); |
64 | 71 | struct address_space *mapping = vma->vm_file->f_mapping; |
65 | 72 | |
... | ... | @@ -68,9 +75,10 @@ |
68 | 75 | struct vm_area_struct *svma; |
69 | 76 | unsigned long saddr; |
70 | 77 | pte_t *spte = NULL; |
78 | + pte_t *pte; | |
71 | 79 | |
72 | 80 | if (!vma_shareable(vma, addr)) |
73 | - return; | |
81 | + return (pte_t *)pmd_alloc(mm, pud, addr); | |
74 | 82 | |
75 | 83 | mutex_lock(&mapping->i_mmap_mutex); |
76 | 84 | vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { |
77 | 85 | |
... | ... | @@ -97,7 +105,9 @@ |
97 | 105 | put_page(virt_to_page(spte)); |
98 | 106 | spin_unlock(&mm->page_table_lock); |
99 | 107 | out: |
108 | + pte = (pte_t *)pmd_alloc(mm, pud, addr); | |
100 | 109 | mutex_unlock(&mapping->i_mmap_mutex); |
110 | + return pte; | |
101 | 111 | } |
102 | 112 | |
103 | 113 | /* |
... | ... | @@ -142,8 +152,9 @@ |
142 | 152 | } else { |
143 | 153 | BUG_ON(sz != PMD_SIZE); |
144 | 154 | if (pud_none(*pud)) |
145 | - huge_pmd_share(mm, addr, pud); | |
146 | - pte = (pte_t *) pmd_alloc(mm, pud, addr); | |
155 | + pte = huge_pmd_share(mm, addr, pud); | |
156 | + else | |
157 | + pte = (pte_t *)pmd_alloc(mm, pud, addr); | |
147 | 158 | } |
148 | 159 | } |
149 | 160 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); |
drivers/block/cciss_scsi.c
... | ... | @@ -763,16 +763,7 @@ |
763 | 763 | { |
764 | 764 | case CMD_TARGET_STATUS: |
765 | 765 | /* Pass it up to the upper layers... */ |
766 | - if( ei->ScsiStatus) | |
767 | - { | |
768 | -#if 0 | |
769 | - printk(KERN_WARNING "cciss: cmd %p " | |
770 | - "has SCSI Status = %x\n", | |
771 | - c, ei->ScsiStatus); | |
772 | -#endif | |
773 | - cmd->result |= (ei->ScsiStatus << 1); | |
774 | - } | |
775 | - else { /* scsi status is zero??? How??? */ | |
766 | + if (!ei->ScsiStatus) { | |
776 | 767 | |
777 | 768 | /* Ordinarily, this case should never happen, but there is a bug |
778 | 769 | in some released firmware revisions that allows it to happen |
drivers/clocksource/cs5535-clockevt.c
... | ... | @@ -53,7 +53,7 @@ |
53 | 53 | #define MFGPT_PERIODIC (MFGPT_HZ / HZ) |
54 | 54 | |
55 | 55 | /* |
56 | - * The MFPGT timers on the CS5536 provide us with suitable timers to use | |
56 | + * The MFGPT timers on the CS5536 provide us with suitable timers to use | |
57 | 57 | * as clock event sources - not as good as a HPET or APIC, but certainly |
58 | 58 | * better than the PIT. This isn't a general purpose MFGPT driver, but |
59 | 59 | * a simplified one designed specifically to act as a clock event source. |
... | ... | @@ -144,7 +144,7 @@ |
144 | 144 | |
145 | 145 | timer = cs5535_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); |
146 | 146 | if (!timer) { |
147 | - printk(KERN_ERR DRV_NAME ": Could not allocate MFPGT timer\n"); | |
147 | + printk(KERN_ERR DRV_NAME ": Could not allocate MFGPT timer\n"); | |
148 | 148 | return -ENODEV; |
149 | 149 | } |
150 | 150 | cs5535_event_clock = timer; |
drivers/misc/sgi-xp/xpc_uv.c
... | ... | @@ -18,6 +18,8 @@ |
18 | 18 | #include <linux/interrupt.h> |
19 | 19 | #include <linux/delay.h> |
20 | 20 | #include <linux/device.h> |
21 | +#include <linux/cpu.h> | |
22 | +#include <linux/module.h> | |
21 | 23 | #include <linux/err.h> |
22 | 24 | #include <linux/slab.h> |
23 | 25 | #include <asm/uv/uv_hub.h> |
... | ... | @@ -59,6 +61,8 @@ |
59 | 61 | XPC_NOTIFY_MSG_SIZE_UV) |
60 | 62 | #define XPC_NOTIFY_IRQ_NAME "xpc_notify" |
61 | 63 | |
64 | +static int xpc_mq_node = -1; | |
65 | + | |
62 | 66 | static struct xpc_gru_mq_uv *xpc_activate_mq_uv; |
63 | 67 | static struct xpc_gru_mq_uv *xpc_notify_mq_uv; |
64 | 68 | |
65 | 69 | |
... | ... | @@ -109,11 +113,8 @@ |
109 | 113 | #if defined CONFIG_X86_64 |
110 | 114 | mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, |
111 | 115 | UV_AFFINITY_CPU); |
112 | - if (mq->irq < 0) { | |
113 | - dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", | |
114 | - -mq->irq); | |
116 | + if (mq->irq < 0) | |
115 | 117 | return mq->irq; |
116 | - } | |
117 | 118 | |
118 | 119 | mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); |
119 | 120 | |
... | ... | @@ -238,8 +239,9 @@ |
238 | 239 | mq->mmr_blade = uv_cpu_to_blade_id(cpu); |
239 | 240 | |
240 | 241 | nid = cpu_to_node(cpu); |
241 | - page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, | |
242 | - pg_order); | |
242 | + page = alloc_pages_exact_node(nid, | |
243 | + GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, | |
244 | + pg_order); | |
243 | 245 | if (page == NULL) { |
244 | 246 | dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " |
245 | 247 | "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); |
246 | 248 | |
... | ... | @@ -1731,9 +1733,50 @@ |
1731 | 1733 | .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, |
1732 | 1734 | }; |
1733 | 1735 | |
1736 | +static int | |
1737 | +xpc_init_mq_node(int nid) | |
1738 | +{ | |
1739 | + int cpu; | |
1740 | + | |
1741 | + get_online_cpus(); | |
1742 | + | |
1743 | + for_each_cpu(cpu, cpumask_of_node(nid)) { | |
1744 | + xpc_activate_mq_uv = | |
1745 | + xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, | |
1746 | + XPC_ACTIVATE_IRQ_NAME, | |
1747 | + xpc_handle_activate_IRQ_uv); | |
1748 | + if (!IS_ERR(xpc_activate_mq_uv)) | |
1749 | + break; | |
1750 | + } | |
1751 | + if (IS_ERR(xpc_activate_mq_uv)) { | |
1752 | + put_online_cpus(); | |
1753 | + return PTR_ERR(xpc_activate_mq_uv); | |
1754 | + } | |
1755 | + | |
1756 | + for_each_cpu(cpu, cpumask_of_node(nid)) { | |
1757 | + xpc_notify_mq_uv = | |
1758 | + xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, | |
1759 | + XPC_NOTIFY_IRQ_NAME, | |
1760 | + xpc_handle_notify_IRQ_uv); | |
1761 | + if (!IS_ERR(xpc_notify_mq_uv)) | |
1762 | + break; | |
1763 | + } | |
1764 | + if (IS_ERR(xpc_notify_mq_uv)) { | |
1765 | + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); | |
1766 | + put_online_cpus(); | |
1767 | + return PTR_ERR(xpc_notify_mq_uv); | |
1768 | + } | |
1769 | + | |
1770 | + put_online_cpus(); | |
1771 | + return 0; | |
1772 | +} | |
1773 | + | |
1734 | 1774 | int |
1735 | 1775 | xpc_init_uv(void) |
1736 | 1776 | { |
1777 | + int nid; | |
1778 | + int ret = 0; | |
1779 | + | |
1737 | 1780 | xpc_arch_ops = xpc_arch_ops_uv; |
1738 | 1781 | |
1739 | 1782 | if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { |
1740 | 1783 | |
1741 | 1784 | |
... | ... | @@ -1742,21 +1785,21 @@ |
1742 | 1785 | return -E2BIG; |
1743 | 1786 | } |
1744 | 1787 | |
1745 | - xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, | |
1746 | - XPC_ACTIVATE_IRQ_NAME, | |
1747 | - xpc_handle_activate_IRQ_uv); | |
1748 | - if (IS_ERR(xpc_activate_mq_uv)) | |
1749 | - return PTR_ERR(xpc_activate_mq_uv); | |
1788 | + if (xpc_mq_node < 0) | |
1789 | + for_each_online_node(nid) { | |
1790 | + ret = xpc_init_mq_node(nid); | |
1750 | 1791 | |
1751 | - xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, | |
1752 | - XPC_NOTIFY_IRQ_NAME, | |
1753 | - xpc_handle_notify_IRQ_uv); | |
1754 | - if (IS_ERR(xpc_notify_mq_uv)) { | |
1755 | - xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); | |
1756 | - return PTR_ERR(xpc_notify_mq_uv); | |
1757 | - } | |
1792 | + if (!ret) | |
1793 | + break; | |
1794 | + } | |
1795 | + else | |
1796 | + ret = xpc_init_mq_node(xpc_mq_node); | |
1758 | 1797 | |
1759 | - return 0; | |
1798 | + if (ret < 0) | |
1799 | + dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n", | |
1800 | + -ret); | |
1801 | + | |
1802 | + return ret; | |
1760 | 1803 | } |
1761 | 1804 | |
1762 | 1805 | void |
... | ... | @@ -1765,4 +1808,7 @@ |
1765 | 1808 | xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); |
1766 | 1809 | xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); |
1767 | 1810 | } |
1811 | + | |
1812 | +module_param(xpc_mq_node, int, 0); | |
1813 | +MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues."); |
drivers/rapidio/devices/tsi721.c
... | ... | @@ -435,6 +435,9 @@ |
435 | 435 | " info %4.4x\n", DBELL_SID(idb.bytes), |
436 | 436 | DBELL_TID(idb.bytes), DBELL_INF(idb.bytes)); |
437 | 437 | } |
438 | + | |
439 | + wr_ptr = ioread32(priv->regs + | |
440 | + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE; | |
438 | 441 | } |
439 | 442 | |
440 | 443 | iowrite32(rd_ptr & (IDB_QSIZE - 1), |
... | ... | @@ -445,6 +448,10 @@ |
445 | 448 | regval |= TSI721_SR_CHINT_IDBQRCV; |
446 | 449 | iowrite32(regval, |
447 | 450 | priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); |
451 | + | |
452 | + wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE; | |
453 | + if (wr_ptr != rd_ptr) | |
454 | + schedule_work(&priv->idb_work); | |
448 | 455 | } |
449 | 456 | |
450 | 457 | /** |
... | ... | @@ -2212,7 +2219,7 @@ |
2212 | 2219 | const struct pci_device_id *id) |
2213 | 2220 | { |
2214 | 2221 | struct tsi721_device *priv; |
2215 | - int i, cap; | |
2222 | + int cap; | |
2216 | 2223 | int err; |
2217 | 2224 | u32 regval; |
2218 | 2225 | |
2219 | 2226 | |
... | ... | @@ -2232,11 +2239,14 @@ |
2232 | 2239 | priv->pdev = pdev; |
2233 | 2240 | |
2234 | 2241 | #ifdef DEBUG |
2242 | + { | |
2243 | + int i; | |
2235 | 2244 | for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { |
2236 | 2245 | dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n", |
2237 | 2246 | i, (unsigned long long)pci_resource_start(pdev, i), |
2238 | 2247 | (unsigned long)pci_resource_len(pdev, i), |
2239 | 2248 | pci_resource_flags(pdev, i)); |
2249 | + } | |
2240 | 2250 | } |
2241 | 2251 | #endif |
2242 | 2252 | /* |
drivers/rtc/rtc-pcf2123.c
... | ... | @@ -43,6 +43,7 @@ |
43 | 43 | #include <linux/rtc.h> |
44 | 44 | #include <linux/spi/spi.h> |
45 | 45 | #include <linux/module.h> |
46 | +#include <linux/sysfs.h> | |
46 | 47 | |
47 | 48 | #define DRV_VERSION "0.6" |
48 | 49 | |
... | ... | @@ -292,6 +293,7 @@ |
292 | 293 | pdata->rtc = rtc; |
293 | 294 | |
294 | 295 | for (i = 0; i < 16; i++) { |
296 | + sysfs_attr_init(&pdata->regs[i].attr.attr); | |
295 | 297 | sprintf(pdata->regs[i].name, "%1x", i); |
296 | 298 | pdata->regs[i].attr.attr.mode = S_IRUGO | S_IWUSR; |
297 | 299 | pdata->regs[i].attr.attr.name = pdata->regs[i].name; |
drivers/rtc/rtc-rs5c348.c
... | ... | @@ -122,9 +122,12 @@ |
122 | 122 | tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK); |
123 | 123 | tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK); |
124 | 124 | if (!pdata->rtc_24h) { |
125 | - tm->tm_hour %= 12; | |
126 | - if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) | |
125 | + if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) { | |
126 | + tm->tm_hour -= 20; | |
127 | + tm->tm_hour %= 12; | |
127 | 128 | tm->tm_hour += 12; |
129 | + } else | |
130 | + tm->tm_hour %= 12; | |
128 | 131 | } |
129 | 132 | tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK); |
130 | 133 | tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK); |
include/linux/compaction.h
... | ... | @@ -22,7 +22,7 @@ |
22 | 22 | extern int fragmentation_index(struct zone *zone, unsigned int order); |
23 | 23 | extern unsigned long try_to_compact_pages(struct zonelist *zonelist, |
24 | 24 | int order, gfp_t gfp_mask, nodemask_t *mask, |
25 | - bool sync); | |
25 | + bool sync, bool *contended); | |
26 | 26 | extern int compact_pgdat(pg_data_t *pgdat, int order); |
27 | 27 | extern unsigned long compaction_suitable(struct zone *zone, int order); |
28 | 28 | |
... | ... | @@ -64,7 +64,7 @@ |
64 | 64 | #else |
65 | 65 | static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, |
66 | 66 | int order, gfp_t gfp_mask, nodemask_t *nodemask, |
67 | - bool sync) | |
67 | + bool sync, bool *contended) | |
68 | 68 | { |
69 | 69 | return COMPACT_CONTINUE; |
70 | 70 | } |
include/linux/string.h
mm/compaction.c
... | ... | @@ -51,6 +51,47 @@ |
51 | 51 | } |
52 | 52 | |
53 | 53 | /* |
54 | + * Compaction requires the taking of some coarse locks that are potentially | |
55 | + * very heavily contended. Check if the process needs to be scheduled or | |
56 | + * if the lock is contended. For async compaction, back out in the event | |
57 | + * if contention is severe. For sync compaction, schedule. | |
58 | + * | |
59 | + * Returns true if the lock is held. | |
60 | + * Returns false if the lock is released and compaction should abort | |
61 | + */ | |
62 | +static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, | |
63 | + bool locked, struct compact_control *cc) | |
64 | +{ | |
65 | + if (need_resched() || spin_is_contended(lock)) { | |
66 | + if (locked) { | |
67 | + spin_unlock_irqrestore(lock, *flags); | |
68 | + locked = false; | |
69 | + } | |
70 | + | |
71 | + /* async aborts if taking too long or contended */ | |
72 | + if (!cc->sync) { | |
73 | + if (cc->contended) | |
74 | + *cc->contended = true; | |
75 | + return false; | |
76 | + } | |
77 | + | |
78 | + cond_resched(); | |
79 | + if (fatal_signal_pending(current)) | |
80 | + return false; | |
81 | + } | |
82 | + | |
83 | + if (!locked) | |
84 | + spin_lock_irqsave(lock, *flags); | |
85 | + return true; | |
86 | +} | |
87 | + | |
88 | +static inline bool compact_trylock_irqsave(spinlock_t *lock, | |
89 | + unsigned long *flags, struct compact_control *cc) | |
90 | +{ | |
91 | + return compact_checklock_irqsave(lock, flags, false, cc); | |
92 | +} | |
93 | + | |
94 | +/* | |
54 | 95 | * Isolate free pages onto a private freelist. Caller must hold zone->lock. |
55 | 96 | * If @strict is true, will abort returning 0 on any invalid PFNs or non-free |
56 | 97 | * pages inside of the pageblock (even though it may still end up isolating |
... | ... | @@ -173,7 +214,7 @@ |
173 | 214 | } |
174 | 215 | |
175 | 216 | /* Update the number of anon and file isolated pages in the zone */ |
176 | -static void acct_isolated(struct zone *zone, struct compact_control *cc) | |
217 | +static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc) | |
177 | 218 | { |
178 | 219 | struct page *page; |
179 | 220 | unsigned int count[2] = { 0, }; |
... | ... | @@ -181,8 +222,14 @@ |
181 | 222 | list_for_each_entry(page, &cc->migratepages, lru) |
182 | 223 | count[!!page_is_file_cache(page)]++; |
183 | 224 | |
184 | - __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); | |
185 | - __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); | |
225 | + /* If locked we can use the interrupt unsafe versions */ | |
226 | + if (locked) { | |
227 | + __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); | |
228 | + __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); | |
229 | + } else { | |
230 | + mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); | |
231 | + mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); | |
232 | + } | |
186 | 233 | } |
187 | 234 | |
188 | 235 | /* Similar to reclaim, but different enough that they don't share logic */ |
... | ... | @@ -228,6 +275,8 @@ |
228 | 275 | struct list_head *migratelist = &cc->migratepages; |
229 | 276 | isolate_mode_t mode = 0; |
230 | 277 | struct lruvec *lruvec; |
278 | + unsigned long flags; | |
279 | + bool locked; | |
231 | 280 | |
232 | 281 | /* |
233 | 282 | * Ensure that there are not too many pages isolated from the LRU |
234 | 283 | |
235 | 284 | |
236 | 285 | |
237 | 286 | |
... | ... | @@ -247,26 +296,23 @@ |
247 | 296 | |
248 | 297 | /* Time to isolate some pages for migration */ |
249 | 298 | cond_resched(); |
250 | - spin_lock_irq(&zone->lru_lock); | |
299 | + spin_lock_irqsave(&zone->lru_lock, flags); | |
300 | + locked = true; | |
251 | 301 | for (; low_pfn < end_pfn; low_pfn++) { |
252 | 302 | struct page *page; |
253 | - bool locked = true; | |
254 | 303 | |
255 | 304 | /* give a chance to irqs before checking need_resched() */ |
256 | 305 | if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) { |
257 | - spin_unlock_irq(&zone->lru_lock); | |
306 | + spin_unlock_irqrestore(&zone->lru_lock, flags); | |
258 | 307 | locked = false; |
259 | 308 | } |
260 | - if (need_resched() || spin_is_contended(&zone->lru_lock)) { | |
261 | - if (locked) | |
262 | - spin_unlock_irq(&zone->lru_lock); | |
263 | - cond_resched(); | |
264 | - spin_lock_irq(&zone->lru_lock); | |
265 | - if (fatal_signal_pending(current)) | |
266 | - break; | |
267 | - } else if (!locked) | |
268 | - spin_lock_irq(&zone->lru_lock); | |
269 | 309 | |
310 | + /* Check if it is ok to still hold the lock */ | |
311 | + locked = compact_checklock_irqsave(&zone->lru_lock, &flags, | |
312 | + locked, cc); | |
313 | + if (!locked) | |
314 | + break; | |
315 | + | |
270 | 316 | /* |
271 | 317 | * migrate_pfn does not necessarily start aligned to a |
272 | 318 | * pageblock. Ensure that pfn_valid is called when moving |
273 | 319 | |
... | ... | @@ -349,9 +395,10 @@ |
349 | 395 | } |
350 | 396 | } |
351 | 397 | |
352 | - acct_isolated(zone, cc); | |
398 | + acct_isolated(zone, locked, cc); | |
353 | 399 | |
354 | - spin_unlock_irq(&zone->lru_lock); | |
400 | + if (locked) | |
401 | + spin_unlock_irqrestore(&zone->lru_lock, flags); | |
355 | 402 | |
356 | 403 | trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); |
357 | 404 | |
... | ... | @@ -384,6 +431,20 @@ |
384 | 431 | } |
385 | 432 | |
386 | 433 | /* |
434 | + * Returns the start pfn of the last page block in a zone. This is the starting | |
435 | + * point for full compaction of a zone. Compaction searches for free pages from | |
436 | + * the end of each zone, while isolate_freepages_block scans forward inside each | |
437 | + * page block. | |
438 | + */ | |
439 | +static unsigned long start_free_pfn(struct zone *zone) | |
440 | +{ | |
441 | + unsigned long free_pfn; | |
442 | + free_pfn = zone->zone_start_pfn + zone->spanned_pages; | |
443 | + free_pfn &= ~(pageblock_nr_pages-1); | |
444 | + return free_pfn; | |
445 | +} | |
446 | + | |
447 | +/* | |
387 | 448 | * Based on information in the current compact_control, find blocks |
388 | 449 | * suitable for isolating free pages from and then isolate them. |
389 | 450 | */ |
... | ... | @@ -422,17 +483,6 @@ |
422 | 483 | pfn -= pageblock_nr_pages) { |
423 | 484 | unsigned long isolated; |
424 | 485 | |
425 | - /* | |
426 | - * Skip ahead if another thread is compacting in the area | |
427 | - * simultaneously. If we wrapped around, we can only skip | |
428 | - * ahead if zone->compact_cached_free_pfn also wrapped to | |
429 | - * above our starting point. | |
430 | - */ | |
431 | - if (cc->order > 0 && (!cc->wrapped || | |
432 | - zone->compact_cached_free_pfn > | |
433 | - cc->start_free_pfn)) | |
434 | - pfn = min(pfn, zone->compact_cached_free_pfn); | |
435 | - | |
436 | 486 | if (!pfn_valid(pfn)) |
437 | 487 | continue; |
438 | 488 | |
... | ... | @@ -458,7 +508,16 @@ |
458 | 508 | * are disabled |
459 | 509 | */ |
460 | 510 | isolated = 0; |
461 | - spin_lock_irqsave(&zone->lock, flags); | |
511 | + | |
512 | + /* | |
513 | + * The zone lock must be held to isolate freepages. This | |
514 | + * unfortunately this is a very coarse lock and can be | |
515 | + * heavily contended if there are parallel allocations | |
516 | + * or parallel compactions. For async compaction do not | |
517 | + * spin on the lock | |
518 | + */ | |
519 | + if (!compact_trylock_irqsave(&zone->lock, &flags, cc)) | |
520 | + break; | |
462 | 521 | if (suitable_migration_target(page)) { |
463 | 522 | end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); |
464 | 523 | isolated = isolate_freepages_block(pfn, end_pfn, |
... | ... | @@ -474,7 +533,15 @@ |
474 | 533 | */ |
475 | 534 | if (isolated) { |
476 | 535 | high_pfn = max(high_pfn, pfn); |
477 | - if (cc->order > 0) | |
536 | + | |
537 | + /* | |
538 | + * If the free scanner has wrapped, update | |
539 | + * compact_cached_free_pfn to point to the highest | |
540 | + * pageblock with free pages. This reduces excessive | |
541 | + * scanning of full pageblocks near the end of the | |
542 | + * zone | |
543 | + */ | |
544 | + if (cc->order > 0 && cc->wrapped) | |
478 | 545 | zone->compact_cached_free_pfn = high_pfn; |
479 | 546 | } |
480 | 547 | } |
... | ... | @@ -484,6 +551,11 @@ |
484 | 551 | |
485 | 552 | cc->free_pfn = high_pfn; |
486 | 553 | cc->nr_freepages = nr_freepages; |
554 | + | |
555 | + /* If compact_cached_free_pfn is reset then set it now */ | |
556 | + if (cc->order > 0 && !cc->wrapped && | |
557 | + zone->compact_cached_free_pfn == start_free_pfn(zone)) | |
558 | + zone->compact_cached_free_pfn = high_pfn; | |
487 | 559 | } |
488 | 560 | |
489 | 561 | /* |
... | ... | @@ -570,20 +642,6 @@ |
570 | 642 | return ISOLATE_SUCCESS; |
571 | 643 | } |
572 | 644 | |
573 | -/* | |
574 | - * Returns the start pfn of the last page block in a zone. This is the starting | |
575 | - * point for full compaction of a zone. Compaction searches for free pages from | |
576 | - * the end of each zone, while isolate_freepages_block scans forward inside each | |
577 | - * page block. | |
578 | - */ | |
579 | -static unsigned long start_free_pfn(struct zone *zone) | |
580 | -{ | |
581 | - unsigned long free_pfn; | |
582 | - free_pfn = zone->zone_start_pfn + zone->spanned_pages; | |
583 | - free_pfn &= ~(pageblock_nr_pages-1); | |
584 | - return free_pfn; | |
585 | -} | |
586 | - | |
587 | 645 | static int compact_finished(struct zone *zone, |
588 | 646 | struct compact_control *cc) |
589 | 647 | { |
... | ... | @@ -771,7 +829,7 @@ |
771 | 829 | |
772 | 830 | static unsigned long compact_zone_order(struct zone *zone, |
773 | 831 | int order, gfp_t gfp_mask, |
774 | - bool sync) | |
832 | + bool sync, bool *contended) | |
775 | 833 | { |
776 | 834 | struct compact_control cc = { |
777 | 835 | .nr_freepages = 0, |
... | ... | @@ -780,6 +838,7 @@ |
780 | 838 | .migratetype = allocflags_to_migratetype(gfp_mask), |
781 | 839 | .zone = zone, |
782 | 840 | .sync = sync, |
841 | + .contended = contended, | |
783 | 842 | }; |
784 | 843 | INIT_LIST_HEAD(&cc.freepages); |
785 | 844 | INIT_LIST_HEAD(&cc.migratepages); |
... | ... | @@ -801,7 +860,7 @@ |
801 | 860 | */ |
802 | 861 | unsigned long try_to_compact_pages(struct zonelist *zonelist, |
803 | 862 | int order, gfp_t gfp_mask, nodemask_t *nodemask, |
804 | - bool sync) | |
863 | + bool sync, bool *contended) | |
805 | 864 | { |
806 | 865 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
807 | 866 | int may_enter_fs = gfp_mask & __GFP_FS; |
... | ... | @@ -825,7 +884,8 @@ |
825 | 884 | nodemask) { |
826 | 885 | int status; |
827 | 886 | |
828 | - status = compact_zone_order(zone, order, gfp_mask, sync); | |
887 | + status = compact_zone_order(zone, order, gfp_mask, sync, | |
888 | + contended); | |
829 | 889 | rc = max(status, rc); |
830 | 890 | |
831 | 891 | /* If a normal allocation would succeed, stop compacting */ |
... | ... | @@ -861,7 +921,7 @@ |
861 | 921 | if (cc->order > 0) { |
862 | 922 | int ok = zone_watermark_ok(zone, cc->order, |
863 | 923 | low_wmark_pages(zone), 0, 0); |
864 | - if (ok && cc->order > zone->compact_order_failed) | |
924 | + if (ok && cc->order >= zone->compact_order_failed) | |
865 | 925 | zone->compact_order_failed = cc->order + 1; |
866 | 926 | /* Currently async compaction is never deferred. */ |
867 | 927 | else if (!ok && cc->sync) |
mm/internal.h
mm/mmap.c
... | ... | @@ -2309,7 +2309,7 @@ |
2309 | 2309 | } |
2310 | 2310 | vm_unacct_memory(nr_accounted); |
2311 | 2311 | |
2312 | - BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); | |
2312 | + WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); | |
2313 | 2313 | } |
2314 | 2314 | |
2315 | 2315 | /* Insert vm structure into process list sorted by address |
mm/page_alloc.c
... | ... | @@ -1928,6 +1928,17 @@ |
1928 | 1928 | zlc_active = 0; |
1929 | 1929 | goto zonelist_scan; |
1930 | 1930 | } |
1931 | + | |
1932 | + if (page) | |
1933 | + /* | |
1934 | + * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was | |
1935 | + * necessary to allocate the page. The expectation is | |
1936 | + * that the caller is taking steps that will free more | |
1937 | + * memory. The caller should avoid the page being used | |
1938 | + * for !PFMEMALLOC purposes. | |
1939 | + */ | |
1940 | + page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); | |
1941 | + | |
1931 | 1942 | return page; |
1932 | 1943 | } |
1933 | 1944 | |
... | ... | @@ -2091,7 +2102,7 @@ |
2091 | 2102 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
2092 | 2103 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
2093 | 2104 | int migratetype, bool sync_migration, |
2094 | - bool *deferred_compaction, | |
2105 | + bool *contended_compaction, bool *deferred_compaction, | |
2095 | 2106 | unsigned long *did_some_progress) |
2096 | 2107 | { |
2097 | 2108 | struct page *page; |
... | ... | @@ -2106,7 +2117,8 @@ |
2106 | 2117 | |
2107 | 2118 | current->flags |= PF_MEMALLOC; |
2108 | 2119 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
2109 | - nodemask, sync_migration); | |
2120 | + nodemask, sync_migration, | |
2121 | + contended_compaction); | |
2110 | 2122 | current->flags &= ~PF_MEMALLOC; |
2111 | 2123 | if (*did_some_progress != COMPACT_SKIPPED) { |
2112 | 2124 | |
... | ... | @@ -2152,7 +2164,7 @@ |
2152 | 2164 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
2153 | 2165 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
2154 | 2166 | int migratetype, bool sync_migration, |
2155 | - bool *deferred_compaction, | |
2167 | + bool *contended_compaction, bool *deferred_compaction, | |
2156 | 2168 | unsigned long *did_some_progress) |
2157 | 2169 | { |
2158 | 2170 | return NULL; |
... | ... | @@ -2325,6 +2337,7 @@ |
2325 | 2337 | unsigned long did_some_progress; |
2326 | 2338 | bool sync_migration = false; |
2327 | 2339 | bool deferred_compaction = false; |
2340 | + bool contended_compaction = false; | |
2328 | 2341 | |
2329 | 2342 | /* |
2330 | 2343 | * In the slowpath, we sanity check order to avoid ever trying to |
... | ... | @@ -2389,14 +2402,6 @@ |
2389 | 2402 | zonelist, high_zoneidx, nodemask, |
2390 | 2403 | preferred_zone, migratetype); |
2391 | 2404 | if (page) { |
2392 | - /* | |
2393 | - * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was | |
2394 | - * necessary to allocate the page. The expectation is | |
2395 | - * that the caller is taking steps that will free more | |
2396 | - * memory. The caller should avoid the page being used | |
2397 | - * for !PFMEMALLOC purposes. | |
2398 | - */ | |
2399 | - page->pfmemalloc = true; | |
2400 | 2405 | goto got_pg; |
2401 | 2406 | } |
2402 | 2407 | } |
... | ... | @@ -2422,6 +2427,7 @@ |
2422 | 2427 | nodemask, |
2423 | 2428 | alloc_flags, preferred_zone, |
2424 | 2429 | migratetype, sync_migration, |
2430 | + &contended_compaction, | |
2425 | 2431 | &deferred_compaction, |
2426 | 2432 | &did_some_progress); |
2427 | 2433 | if (page) |
2428 | 2434 | |
... | ... | @@ -2431,10 +2437,11 @@ |
2431 | 2437 | /* |
2432 | 2438 | * If compaction is deferred for high-order allocations, it is because |
2433 | 2439 | * sync compaction recently failed. In this is the case and the caller |
2434 | - * has requested the system not be heavily disrupted, fail the | |
2435 | - * allocation now instead of entering direct reclaim | |
2440 | + * requested a movable allocation that does not heavily disrupt the | |
2441 | + * system then fail the allocation instead of entering direct reclaim. | |
2436 | 2442 | */ |
2437 | - if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) | |
2443 | + if ((deferred_compaction || contended_compaction) && | |
2444 | + (gfp_mask & __GFP_NO_KSWAPD)) | |
2438 | 2445 | goto nopage; |
2439 | 2446 | |
2440 | 2447 | /* Try direct reclaim and then allocating */ |
... | ... | @@ -2505,6 +2512,7 @@ |
2505 | 2512 | nodemask, |
2506 | 2513 | alloc_flags, preferred_zone, |
2507 | 2514 | migratetype, sync_migration, |
2515 | + &contended_compaction, | |
2508 | 2516 | &deferred_compaction, |
2509 | 2517 | &did_some_progress); |
2510 | 2518 | if (page) |
... | ... | @@ -2569,8 +2577,6 @@ |
2569 | 2577 | page = __alloc_pages_slowpath(gfp_mask, order, |
2570 | 2578 | zonelist, high_zoneidx, nodemask, |
2571 | 2579 | preferred_zone, migratetype); |
2572 | - else | |
2573 | - page->pfmemalloc = false; | |
2574 | 2580 | |
2575 | 2581 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); |
2576 | 2582 |
scripts/checkpatch.pl
... | ... | @@ -3016,7 +3016,8 @@ |
3016 | 3016 | $herectx .= raw_line($linenr, $n) . "\n"; |
3017 | 3017 | } |
3018 | 3018 | |
3019 | - if (($stmts =~ tr/;/;/) == 1) { | |
3019 | + if (($stmts =~ tr/;/;/) == 1 && | |
3020 | + $stmts !~ /^\s*(if|while|for|switch)\b/) { | |
3020 | 3021 | WARN("SINGLE_STATEMENT_DO_WHILE_MACRO", |
3021 | 3022 | "Single statement macros should not use a do {} while (0) loop\n" . "$herectx"); |
3022 | 3023 | } |