Commit 854ffa8d104e44111fec96764c0e0cb29223d54c

Authored by Daisuke Nishimura
Committed by Linus Torvalds
1 parent 4ffef5feff

memcg: improve performance in moving charge

Try to reduce overheads in moving charge by:

- Instead of calling res_counter_uncharge() against the old cgroup in
  __mem_cgroup_move_account() everytime, call res_counter_uncharge() at the end
  of task migration once.
- removed css_get(&to->css) from __mem_cgroup_move_account() because callers
  should have already called css_get(). And removed css_put(&to->css) too,
  which was called by callers of move_account on success of move_account.
- Instead of calling __mem_cgroup_try_charge(), i.e. res_counter_charge(),
  repeatedly, call res_counter_charge(PAGE_SIZE * count) in can_attach() if
  possible.
- Instead of calling css_get()/css_put() repeatedly, make use of coalesce
  __css_get()/__css_put() if possible.

These changes reduces the overhead from 1.7sec to 0.6sec to move charges
of 1G anonymous memory in my test environment.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 98 additions and 54 deletions Side-by-side Diff

... ... @@ -253,6 +253,7 @@
253 253 struct mem_cgroup *from;
254 254 struct mem_cgroup *to;
255 255 unsigned long precharge;
  256 + unsigned long moved_charge;
256 257 } mc;
257 258  
258 259 /*
259 260  
260 261  
261 262  
262 263  
... ... @@ -1536,16 +1537,25 @@
1536 1537 * This function is for that and do uncharge, put css's refcnt.
1537 1538 * gotten by try_charge().
1538 1539 */
1539   -static void mem_cgroup_cancel_charge(struct mem_cgroup *mem)
  1540 +static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem,
  1541 + unsigned long count)
1540 1542 {
1541 1543 if (!mem_cgroup_is_root(mem)) {
1542   - res_counter_uncharge(&mem->res, PAGE_SIZE);
  1544 + res_counter_uncharge(&mem->res, PAGE_SIZE * count);
1543 1545 if (do_swap_account)
1544   - res_counter_uncharge(&mem->memsw, PAGE_SIZE);
  1546 + res_counter_uncharge(&mem->memsw, PAGE_SIZE * count);
  1547 + VM_BUG_ON(test_bit(CSS_ROOT, &mem->css.flags));
  1548 + WARN_ON_ONCE(count > INT_MAX);
  1549 + __css_put(&mem->css, (int)count);
1545 1550 }
1546   - css_put(&mem->css);
  1551 + /* we don't need css_put for root */
1547 1552 }
1548 1553  
  1554 +static void mem_cgroup_cancel_charge(struct mem_cgroup *mem)
  1555 +{
  1556 + __mem_cgroup_cancel_charge(mem, 1);
  1557 +}
  1558 +
1549 1559 /*
1550 1560 * A helper function to get mem_cgroup from ID. must be called under
1551 1561 * rcu_read_lock(). The caller must check css_is_removed() or some if
1552 1562  
1553 1563  
... ... @@ -1646,17 +1656,20 @@
1646 1656 * @pc: page_cgroup of the page.
1647 1657 * @from: mem_cgroup which the page is moved from.
1648 1658 * @to: mem_cgroup which the page is moved to. @from != @to.
  1659 + * @uncharge: whether we should call uncharge and css_put against @from.
1649 1660 *
1650 1661 * The caller must confirm following.
1651 1662 * - page is not on LRU (isolate_page() is useful.)
1652 1663 * - the pc is locked, used, and ->mem_cgroup points to @from.
1653 1664 *
1654   - * This function does "uncharge" from old cgroup but doesn't do "charge" to
1655   - * new cgroup. It should be done by a caller.
  1665 + * This function doesn't do "charge" nor css_get to new cgroup. It should be
  1666 + * done by a caller(__mem_cgroup_try_charge would be usefull). If @uncharge is
  1667 + * true, this function does "uncharge" from old cgroup, but it doesn't if
  1668 + * @uncharge is false, so a caller should do "uncharge".
1656 1669 */
1657 1670  
1658 1671 static void __mem_cgroup_move_account(struct page_cgroup *pc,
1659   - struct mem_cgroup *from, struct mem_cgroup *to)
  1672 + struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
1660 1673 {
1661 1674 struct page *page;
1662 1675 int cpu;
... ... @@ -1669,10 +1682,6 @@
1669 1682 VM_BUG_ON(!PageCgroupUsed(pc));
1670 1683 VM_BUG_ON(pc->mem_cgroup != from);
1671 1684  
1672   - if (!mem_cgroup_is_root(from))
1673   - res_counter_uncharge(&from->res, PAGE_SIZE);
1674   - mem_cgroup_charge_statistics(from, pc, false);
1675   -
1676 1685 page = pc->page;
1677 1686 if (page_mapped(page) && !PageAnon(page)) {
1678 1687 cpu = smp_processor_id();
1679 1688  
... ... @@ -1688,12 +1697,12 @@
1688 1697 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
1689 1698 1);
1690 1699 }
  1700 + mem_cgroup_charge_statistics(from, pc, false);
  1701 + if (uncharge)
  1702 + /* This is not "cancel", but cancel_charge does all we need. */
  1703 + mem_cgroup_cancel_charge(from);
1691 1704  
1692   - if (do_swap_account && !mem_cgroup_is_root(from))
1693   - res_counter_uncharge(&from->memsw, PAGE_SIZE);
1694   - css_put(&from->css);
1695   -
1696   - css_get(&to->css);
  1705 + /* caller should have done css_get */
1697 1706 pc->mem_cgroup = to;
1698 1707 mem_cgroup_charge_statistics(to, pc, true);
1699 1708 /*
1700 1709  
... ... @@ -1710,12 +1719,12 @@
1710 1719 * __mem_cgroup_move_account()
1711 1720 */
1712 1721 static int mem_cgroup_move_account(struct page_cgroup *pc,
1713   - struct mem_cgroup *from, struct mem_cgroup *to)
  1722 + struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
1714 1723 {
1715 1724 int ret = -EINVAL;
1716 1725 lock_page_cgroup(pc);
1717 1726 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
1718   - __mem_cgroup_move_account(pc, from, to);
  1727 + __mem_cgroup_move_account(pc, from, to, uncharge);
1719 1728 ret = 0;
1720 1729 }
1721 1730 unlock_page_cgroup(pc);
... ... @@ -1751,11 +1760,9 @@
1751 1760 if (ret || !parent)
1752 1761 goto put_back;
1753 1762  
1754   - ret = mem_cgroup_move_account(pc, child, parent);
1755   - if (!ret)
1756   - css_put(&parent->css); /* drop extra refcnt by try_charge() */
1757   - else
1758   - mem_cgroup_cancel_charge(parent); /* does css_put */
  1763 + ret = mem_cgroup_move_account(pc, child, parent, true);
  1764 + if (ret)
  1765 + mem_cgroup_cancel_charge(parent);
1759 1766 put_back:
1760 1767 putback_lru_page(page);
1761 1768 put:
1762 1769  
1763 1770  
... ... @@ -3438,16 +3445,58 @@
3438 3445 }
3439 3446  
3440 3447 /* Handlers for move charge at task migration. */
3441   -static int mem_cgroup_do_precharge(void)
  3448 +#define PRECHARGE_COUNT_AT_ONCE 256
  3449 +static int mem_cgroup_do_precharge(unsigned long count)
3442 3450 {
3443   - int ret = -ENOMEM;
  3451 + int ret = 0;
  3452 + int batch_count = PRECHARGE_COUNT_AT_ONCE;
3444 3453 struct mem_cgroup *mem = mc.to;
3445 3454  
3446   - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, NULL);
3447   - if (ret || !mem)
3448   - return -ENOMEM;
3449   -
3450   - mc.precharge++;
  3455 + if (mem_cgroup_is_root(mem)) {
  3456 + mc.precharge += count;
  3457 + /* we don't need css_get for root */
  3458 + return ret;
  3459 + }
  3460 + /* try to charge at once */
  3461 + if (count > 1) {
  3462 + struct res_counter *dummy;
  3463 + /*
  3464 + * "mem" cannot be under rmdir() because we've already checked
  3465 + * by cgroup_lock_live_cgroup() that it is not removed and we
  3466 + * are still under the same cgroup_mutex. So we can postpone
  3467 + * css_get().
  3468 + */
  3469 + if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy))
  3470 + goto one_by_one;
  3471 + if (do_swap_account && res_counter_charge(&mem->memsw,
  3472 + PAGE_SIZE * count, &dummy)) {
  3473 + res_counter_uncharge(&mem->res, PAGE_SIZE * count);
  3474 + goto one_by_one;
  3475 + }
  3476 + mc.precharge += count;
  3477 + VM_BUG_ON(test_bit(CSS_ROOT, &mem->css.flags));
  3478 + WARN_ON_ONCE(count > INT_MAX);
  3479 + __css_get(&mem->css, (int)count);
  3480 + return ret;
  3481 + }
  3482 +one_by_one:
  3483 + /* fall back to one by one charge */
  3484 + while (count--) {
  3485 + if (signal_pending(current)) {
  3486 + ret = -EINTR;
  3487 + break;
  3488 + }
  3489 + if (!batch_count--) {
  3490 + batch_count = PRECHARGE_COUNT_AT_ONCE;
  3491 + cond_resched();
  3492 + }
  3493 + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem,
  3494 + false, NULL);
  3495 + if (ret || !mem)
  3496 + /* mem_cgroup_clear_mc() will do uncharge later */
  3497 + return -ENOMEM;
  3498 + mc.precharge++;
  3499 + }
3451 3500 return ret;
3452 3501 }
3453 3502  
3454 3503  
3455 3504  
3456 3505  
... ... @@ -3570,35 +3619,26 @@
3570 3619 return precharge;
3571 3620 }
3572 3621  
3573   -#define PRECHARGE_AT_ONCE 256
3574 3622 static int mem_cgroup_precharge_mc(struct mm_struct *mm)
3575 3623 {
3576   - int ret = 0;
3577   - int count = PRECHARGE_AT_ONCE;
3578   - unsigned long precharge = mem_cgroup_count_precharge(mm);
3579   -
3580   - while (!ret && precharge--) {
3581   - if (signal_pending(current)) {
3582   - ret = -EINTR;
3583   - break;
3584   - }
3585   - if (!count--) {
3586   - count = PRECHARGE_AT_ONCE;
3587   - cond_resched();
3588   - }
3589   - ret = mem_cgroup_do_precharge();
3590   - }
3591   -
3592   - return ret;
  3624 + return mem_cgroup_do_precharge(mem_cgroup_count_precharge(mm));
3593 3625 }
3594 3626  
3595 3627 static void mem_cgroup_clear_mc(void)
3596 3628 {
3597 3629 /* we must uncharge all the leftover precharges from mc.to */
3598   - while (mc.precharge) {
3599   - mem_cgroup_cancel_charge(mc.to);
3600   - mc.precharge--;
  3630 + if (mc.precharge) {
  3631 + __mem_cgroup_cancel_charge(mc.to, mc.precharge);
  3632 + mc.precharge = 0;
3601 3633 }
  3634 + /*
  3635 + * we didn't uncharge from mc.from at mem_cgroup_move_account(), so
  3636 + * we must uncharge here.
  3637 + */
  3638 + if (mc.moved_charge) {
  3639 + __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
  3640 + mc.moved_charge = 0;
  3641 + }
3602 3642 mc.from = NULL;
3603 3643 mc.to = NULL;
3604 3644 }
3605 3645  
... ... @@ -3625,9 +3665,11 @@
3625 3665 VM_BUG_ON(mc.from);
3626 3666 VM_BUG_ON(mc.to);
3627 3667 VM_BUG_ON(mc.precharge);
  3668 + VM_BUG_ON(mc.moved_charge);
3628 3669 mc.from = from;
3629 3670 mc.to = mem;
3630 3671 mc.precharge = 0;
  3672 + mc.moved_charge = 0;
3631 3673  
3632 3674 ret = mem_cgroup_precharge_mc(mm);
3633 3675 if (ret)
3634 3676  
... ... @@ -3674,9 +3716,11 @@
3674 3716 if (isolate_lru_page(page))
3675 3717 goto put;
3676 3718 pc = lookup_page_cgroup(page);
3677   - if (!mem_cgroup_move_account(pc, mc.from, mc.to)) {
3678   - css_put(&mc.to->css);
  3719 + if (!mem_cgroup_move_account(pc,
  3720 + mc.from, mc.to, false)) {
3679 3721 mc.precharge--;
  3722 + /* we uncharge from mc.from later. */
  3723 + mc.moved_charge++;
3680 3724 }
3681 3725 putback_lru_page(page);
3682 3726 put: /* is_target_pte_for_mc() gets the page */
... ... @@ -3696,7 +3740,7 @@
3696 3740 * charges to mc.to if we have failed in charge once in attach()
3697 3741 * phase.
3698 3742 */
3699   - ret = mem_cgroup_do_precharge();
  3743 + ret = mem_cgroup_do_precharge(1);
3700 3744 if (!ret)
3701 3745 goto retry;
3702 3746 }