Commit 854ffa8d104e44111fec96764c0e0cb29223d54c
Committed by
Linus Torvalds
1 parent
4ffef5feff
Exists in
master
and in
4 other branches
memcg: improve performance in moving charge
Try to reduce overheads in moving charge by: - Instead of calling res_counter_uncharge() against the old cgroup in __mem_cgroup_move_account() everytime, call res_counter_uncharge() at the end of task migration once. - removed css_get(&to->css) from __mem_cgroup_move_account() because callers should have already called css_get(). And removed css_put(&to->css) too, which was called by callers of move_account on success of move_account. - Instead of calling __mem_cgroup_try_charge(), i.e. res_counter_charge(), repeatedly, call res_counter_charge(PAGE_SIZE * count) in can_attach() if possible. - Instead of calling css_get()/css_put() repeatedly, make use of coalesce __css_get()/__css_put() if possible. These changes reduces the overhead from 1.7sec to 0.6sec to move charges of 1G anonymous memory in my test environment. Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 98 additions and 54 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -253,6 +253,7 @@ |
253 | 253 | struct mem_cgroup *from; |
254 | 254 | struct mem_cgroup *to; |
255 | 255 | unsigned long precharge; |
256 | + unsigned long moved_charge; | |
256 | 257 | } mc; |
257 | 258 | |
258 | 259 | /* |
259 | 260 | |
260 | 261 | |
261 | 262 | |
262 | 263 | |
... | ... | @@ -1536,16 +1537,25 @@ |
1536 | 1537 | * This function is for that and do uncharge, put css's refcnt. |
1537 | 1538 | * gotten by try_charge(). |
1538 | 1539 | */ |
1539 | -static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) | |
1540 | +static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, | |
1541 | + unsigned long count) | |
1540 | 1542 | { |
1541 | 1543 | if (!mem_cgroup_is_root(mem)) { |
1542 | - res_counter_uncharge(&mem->res, PAGE_SIZE); | |
1544 | + res_counter_uncharge(&mem->res, PAGE_SIZE * count); | |
1543 | 1545 | if (do_swap_account) |
1544 | - res_counter_uncharge(&mem->memsw, PAGE_SIZE); | |
1546 | + res_counter_uncharge(&mem->memsw, PAGE_SIZE * count); | |
1547 | + VM_BUG_ON(test_bit(CSS_ROOT, &mem->css.flags)); | |
1548 | + WARN_ON_ONCE(count > INT_MAX); | |
1549 | + __css_put(&mem->css, (int)count); | |
1545 | 1550 | } |
1546 | - css_put(&mem->css); | |
1551 | + /* we don't need css_put for root */ | |
1547 | 1552 | } |
1548 | 1553 | |
1554 | +static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) | |
1555 | +{ | |
1556 | + __mem_cgroup_cancel_charge(mem, 1); | |
1557 | +} | |
1558 | + | |
1549 | 1559 | /* |
1550 | 1560 | * A helper function to get mem_cgroup from ID. must be called under |
1551 | 1561 | * rcu_read_lock(). The caller must check css_is_removed() or some if |
1552 | 1562 | |
1553 | 1563 | |
... | ... | @@ -1646,17 +1656,20 @@ |
1646 | 1656 | * @pc: page_cgroup of the page. |
1647 | 1657 | * @from: mem_cgroup which the page is moved from. |
1648 | 1658 | * @to: mem_cgroup which the page is moved to. @from != @to. |
1659 | + * @uncharge: whether we should call uncharge and css_put against @from. | |
1649 | 1660 | * |
1650 | 1661 | * The caller must confirm following. |
1651 | 1662 | * - page is not on LRU (isolate_page() is useful.) |
1652 | 1663 | * - the pc is locked, used, and ->mem_cgroup points to @from. |
1653 | 1664 | * |
1654 | - * This function does "uncharge" from old cgroup but doesn't do "charge" to | |
1655 | - * new cgroup. It should be done by a caller. | |
1665 | + * This function doesn't do "charge" nor css_get to new cgroup. It should be | |
1666 | + * done by a caller(__mem_cgroup_try_charge would be usefull). If @uncharge is | |
1667 | + * true, this function does "uncharge" from old cgroup, but it doesn't if | |
1668 | + * @uncharge is false, so a caller should do "uncharge". | |
1656 | 1669 | */ |
1657 | 1670 | |
1658 | 1671 | static void __mem_cgroup_move_account(struct page_cgroup *pc, |
1659 | - struct mem_cgroup *from, struct mem_cgroup *to) | |
1672 | + struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | |
1660 | 1673 | { |
1661 | 1674 | struct page *page; |
1662 | 1675 | int cpu; |
... | ... | @@ -1669,10 +1682,6 @@ |
1669 | 1682 | VM_BUG_ON(!PageCgroupUsed(pc)); |
1670 | 1683 | VM_BUG_ON(pc->mem_cgroup != from); |
1671 | 1684 | |
1672 | - if (!mem_cgroup_is_root(from)) | |
1673 | - res_counter_uncharge(&from->res, PAGE_SIZE); | |
1674 | - mem_cgroup_charge_statistics(from, pc, false); | |
1675 | - | |
1676 | 1685 | page = pc->page; |
1677 | 1686 | if (page_mapped(page) && !PageAnon(page)) { |
1678 | 1687 | cpu = smp_processor_id(); |
1679 | 1688 | |
... | ... | @@ -1688,12 +1697,12 @@ |
1688 | 1697 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, |
1689 | 1698 | 1); |
1690 | 1699 | } |
1700 | + mem_cgroup_charge_statistics(from, pc, false); | |
1701 | + if (uncharge) | |
1702 | + /* This is not "cancel", but cancel_charge does all we need. */ | |
1703 | + mem_cgroup_cancel_charge(from); | |
1691 | 1704 | |
1692 | - if (do_swap_account && !mem_cgroup_is_root(from)) | |
1693 | - res_counter_uncharge(&from->memsw, PAGE_SIZE); | |
1694 | - css_put(&from->css); | |
1695 | - | |
1696 | - css_get(&to->css); | |
1705 | + /* caller should have done css_get */ | |
1697 | 1706 | pc->mem_cgroup = to; |
1698 | 1707 | mem_cgroup_charge_statistics(to, pc, true); |
1699 | 1708 | /* |
1700 | 1709 | |
... | ... | @@ -1710,12 +1719,12 @@ |
1710 | 1719 | * __mem_cgroup_move_account() |
1711 | 1720 | */ |
1712 | 1721 | static int mem_cgroup_move_account(struct page_cgroup *pc, |
1713 | - struct mem_cgroup *from, struct mem_cgroup *to) | |
1722 | + struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | |
1714 | 1723 | { |
1715 | 1724 | int ret = -EINVAL; |
1716 | 1725 | lock_page_cgroup(pc); |
1717 | 1726 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { |
1718 | - __mem_cgroup_move_account(pc, from, to); | |
1727 | + __mem_cgroup_move_account(pc, from, to, uncharge); | |
1719 | 1728 | ret = 0; |
1720 | 1729 | } |
1721 | 1730 | unlock_page_cgroup(pc); |
... | ... | @@ -1751,11 +1760,9 @@ |
1751 | 1760 | if (ret || !parent) |
1752 | 1761 | goto put_back; |
1753 | 1762 | |
1754 | - ret = mem_cgroup_move_account(pc, child, parent); | |
1755 | - if (!ret) | |
1756 | - css_put(&parent->css); /* drop extra refcnt by try_charge() */ | |
1757 | - else | |
1758 | - mem_cgroup_cancel_charge(parent); /* does css_put */ | |
1763 | + ret = mem_cgroup_move_account(pc, child, parent, true); | |
1764 | + if (ret) | |
1765 | + mem_cgroup_cancel_charge(parent); | |
1759 | 1766 | put_back: |
1760 | 1767 | putback_lru_page(page); |
1761 | 1768 | put: |
1762 | 1769 | |
1763 | 1770 | |
... | ... | @@ -3438,16 +3445,58 @@ |
3438 | 3445 | } |
3439 | 3446 | |
3440 | 3447 | /* Handlers for move charge at task migration. */ |
3441 | -static int mem_cgroup_do_precharge(void) | |
3448 | +#define PRECHARGE_COUNT_AT_ONCE 256 | |
3449 | +static int mem_cgroup_do_precharge(unsigned long count) | |
3442 | 3450 | { |
3443 | - int ret = -ENOMEM; | |
3451 | + int ret = 0; | |
3452 | + int batch_count = PRECHARGE_COUNT_AT_ONCE; | |
3444 | 3453 | struct mem_cgroup *mem = mc.to; |
3445 | 3454 | |
3446 | - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, NULL); | |
3447 | - if (ret || !mem) | |
3448 | - return -ENOMEM; | |
3449 | - | |
3450 | - mc.precharge++; | |
3455 | + if (mem_cgroup_is_root(mem)) { | |
3456 | + mc.precharge += count; | |
3457 | + /* we don't need css_get for root */ | |
3458 | + return ret; | |
3459 | + } | |
3460 | + /* try to charge at once */ | |
3461 | + if (count > 1) { | |
3462 | + struct res_counter *dummy; | |
3463 | + /* | |
3464 | + * "mem" cannot be under rmdir() because we've already checked | |
3465 | + * by cgroup_lock_live_cgroup() that it is not removed and we | |
3466 | + * are still under the same cgroup_mutex. So we can postpone | |
3467 | + * css_get(). | |
3468 | + */ | |
3469 | + if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy)) | |
3470 | + goto one_by_one; | |
3471 | + if (do_swap_account && res_counter_charge(&mem->memsw, | |
3472 | + PAGE_SIZE * count, &dummy)) { | |
3473 | + res_counter_uncharge(&mem->res, PAGE_SIZE * count); | |
3474 | + goto one_by_one; | |
3475 | + } | |
3476 | + mc.precharge += count; | |
3477 | + VM_BUG_ON(test_bit(CSS_ROOT, &mem->css.flags)); | |
3478 | + WARN_ON_ONCE(count > INT_MAX); | |
3479 | + __css_get(&mem->css, (int)count); | |
3480 | + return ret; | |
3481 | + } | |
3482 | +one_by_one: | |
3483 | + /* fall back to one by one charge */ | |
3484 | + while (count--) { | |
3485 | + if (signal_pending(current)) { | |
3486 | + ret = -EINTR; | |
3487 | + break; | |
3488 | + } | |
3489 | + if (!batch_count--) { | |
3490 | + batch_count = PRECHARGE_COUNT_AT_ONCE; | |
3491 | + cond_resched(); | |
3492 | + } | |
3493 | + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, | |
3494 | + false, NULL); | |
3495 | + if (ret || !mem) | |
3496 | + /* mem_cgroup_clear_mc() will do uncharge later */ | |
3497 | + return -ENOMEM; | |
3498 | + mc.precharge++; | |
3499 | + } | |
3451 | 3500 | return ret; |
3452 | 3501 | } |
3453 | 3502 | |
3454 | 3503 | |
3455 | 3504 | |
3456 | 3505 | |
... | ... | @@ -3570,35 +3619,26 @@ |
3570 | 3619 | return precharge; |
3571 | 3620 | } |
3572 | 3621 | |
3573 | -#define PRECHARGE_AT_ONCE 256 | |
3574 | 3622 | static int mem_cgroup_precharge_mc(struct mm_struct *mm) |
3575 | 3623 | { |
3576 | - int ret = 0; | |
3577 | - int count = PRECHARGE_AT_ONCE; | |
3578 | - unsigned long precharge = mem_cgroup_count_precharge(mm); | |
3579 | - | |
3580 | - while (!ret && precharge--) { | |
3581 | - if (signal_pending(current)) { | |
3582 | - ret = -EINTR; | |
3583 | - break; | |
3584 | - } | |
3585 | - if (!count--) { | |
3586 | - count = PRECHARGE_AT_ONCE; | |
3587 | - cond_resched(); | |
3588 | - } | |
3589 | - ret = mem_cgroup_do_precharge(); | |
3590 | - } | |
3591 | - | |
3592 | - return ret; | |
3624 | + return mem_cgroup_do_precharge(mem_cgroup_count_precharge(mm)); | |
3593 | 3625 | } |
3594 | 3626 | |
3595 | 3627 | static void mem_cgroup_clear_mc(void) |
3596 | 3628 | { |
3597 | 3629 | /* we must uncharge all the leftover precharges from mc.to */ |
3598 | - while (mc.precharge) { | |
3599 | - mem_cgroup_cancel_charge(mc.to); | |
3600 | - mc.precharge--; | |
3630 | + if (mc.precharge) { | |
3631 | + __mem_cgroup_cancel_charge(mc.to, mc.precharge); | |
3632 | + mc.precharge = 0; | |
3601 | 3633 | } |
3634 | + /* | |
3635 | + * we didn't uncharge from mc.from at mem_cgroup_move_account(), so | |
3636 | + * we must uncharge here. | |
3637 | + */ | |
3638 | + if (mc.moved_charge) { | |
3639 | + __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); | |
3640 | + mc.moved_charge = 0; | |
3641 | + } | |
3602 | 3642 | mc.from = NULL; |
3603 | 3643 | mc.to = NULL; |
3604 | 3644 | } |
3605 | 3645 | |
... | ... | @@ -3625,9 +3665,11 @@ |
3625 | 3665 | VM_BUG_ON(mc.from); |
3626 | 3666 | VM_BUG_ON(mc.to); |
3627 | 3667 | VM_BUG_ON(mc.precharge); |
3668 | + VM_BUG_ON(mc.moved_charge); | |
3628 | 3669 | mc.from = from; |
3629 | 3670 | mc.to = mem; |
3630 | 3671 | mc.precharge = 0; |
3672 | + mc.moved_charge = 0; | |
3631 | 3673 | |
3632 | 3674 | ret = mem_cgroup_precharge_mc(mm); |
3633 | 3675 | if (ret) |
3634 | 3676 | |
... | ... | @@ -3674,9 +3716,11 @@ |
3674 | 3716 | if (isolate_lru_page(page)) |
3675 | 3717 | goto put; |
3676 | 3718 | pc = lookup_page_cgroup(page); |
3677 | - if (!mem_cgroup_move_account(pc, mc.from, mc.to)) { | |
3678 | - css_put(&mc.to->css); | |
3719 | + if (!mem_cgroup_move_account(pc, | |
3720 | + mc.from, mc.to, false)) { | |
3679 | 3721 | mc.precharge--; |
3722 | + /* we uncharge from mc.from later. */ | |
3723 | + mc.moved_charge++; | |
3680 | 3724 | } |
3681 | 3725 | putback_lru_page(page); |
3682 | 3726 | put: /* is_target_pte_for_mc() gets the page */ |
... | ... | @@ -3696,7 +3740,7 @@ |
3696 | 3740 | * charges to mc.to if we have failed in charge once in attach() |
3697 | 3741 | * phase. |
3698 | 3742 | */ |
3699 | - ret = mem_cgroup_do_precharge(); | |
3743 | + ret = mem_cgroup_do_precharge(1); | |
3700 | 3744 | if (!ret) |
3701 | 3745 | goto retry; |
3702 | 3746 | } |