Commit 6d1fdc48938cd51a3964778d78f27cb26c8eb55d
Committed by
Linus Torvalds
1 parent
b6b6cc72bc
Exists in
master
and in
13 other branches
memcg: sanitize __mem_cgroup_try_charge() call protocol
Some callsites pass a memcg directly, some callsites pass an mm that then has to be translated to a memcg. This makes for a terrible function interface. Just push the mm-to-memcg translation into the respective callsites and always pass a memcg to mem_cgroup_try_charge(). [mhocko@suse.cz: add charge mm helper] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 102 additions and 105 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -2575,7 +2575,7 @@ |
2575 | 2575 | } |
2576 | 2576 | |
2577 | 2577 | |
2578 | -/* See __mem_cgroup_try_charge() for details */ | |
2578 | +/* See mem_cgroup_try_charge() for details */ | |
2579 | 2579 | enum { |
2580 | 2580 | CHARGE_OK, /* success */ |
2581 | 2581 | CHARGE_RETRY, /* need to retry but retry is not bad */ |
2582 | 2582 | |
2583 | 2583 | |
2584 | 2584 | |
2585 | 2585 | |
2586 | 2586 | |
2587 | 2587 | |
... | ... | @@ -2648,45 +2648,34 @@ |
2648 | 2648 | return CHARGE_NOMEM; |
2649 | 2649 | } |
2650 | 2650 | |
2651 | -/* | |
2652 | - * __mem_cgroup_try_charge() does | |
2653 | - * 1. detect memcg to be charged against from passed *mm and *ptr, | |
2654 | - * 2. update res_counter | |
2655 | - * 3. call memory reclaim if necessary. | |
2651 | +/** | |
2652 | + * mem_cgroup_try_charge - try charging a memcg | |
2653 | + * @memcg: memcg to charge | |
2654 | + * @nr_pages: number of pages to charge | |
2655 | + * @oom: trigger OOM if reclaim fails | |
2656 | 2656 | * |
2657 | - * In some special case, if the task is fatal, fatal_signal_pending() or | |
2658 | - * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup | |
2659 | - * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon | |
2660 | - * as possible without any hazards. 2: all pages should have a valid | |
2661 | - * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg | |
2662 | - * pointer, that is treated as a charge to root_mem_cgroup. | |
2663 | - * | |
2664 | - * So __mem_cgroup_try_charge() will return | |
2665 | - * 0 ... on success, filling *ptr with a valid memcg pointer. | |
2666 | - * -ENOMEM ... charge failure because of resource limits. | |
2667 | - * -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup. | |
2668 | - * | |
2669 | - * Unlike the exported interface, an "oom" parameter is added. if oom==true, | |
2670 | - * the oom-killer can be invoked. | |
2657 | + * Returns 0 if @memcg was charged successfully, -EINTR if the charge | |
2658 | + * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed. | |
2671 | 2659 | */ |
2672 | -static int __mem_cgroup_try_charge(struct mm_struct *mm, | |
2673 | - gfp_t gfp_mask, | |
2674 | - unsigned int nr_pages, | |
2675 | - struct mem_cgroup **ptr, | |
2676 | - bool oom) | |
2660 | +static int mem_cgroup_try_charge(struct mem_cgroup *memcg, | |
2661 | + gfp_t gfp_mask, | |
2662 | + unsigned int nr_pages, | |
2663 | + bool oom) | |
2677 | 2664 | { |
2678 | 2665 | unsigned int batch = max(CHARGE_BATCH, nr_pages); |
2679 | 2666 | int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; |
2680 | - struct mem_cgroup *memcg = NULL; | |
2681 | 2667 | int ret; |
2682 | 2668 | |
2669 | + if (mem_cgroup_is_root(memcg)) | |
2670 | + goto done; | |
2683 | 2671 | /* |
2684 | - * Unlike gloval-vm's OOM-kill, we're not in memory shortage | |
2685 | - * in system level. So, allow to go ahead dying process in addition to | |
2686 | - * MEMDIE process. | |
2672 | + * Unlike in global OOM situations, memcg is not in a physical | |
2673 | + * memory shortage. Allow dying and OOM-killed tasks to | |
2674 | + * bypass the last charges so that they can exit quickly and | |
2675 | + * free their memory. | |
2687 | 2676 | */ |
2688 | - if (unlikely(test_thread_flag(TIF_MEMDIE) | |
2689 | - || fatal_signal_pending(current))) | |
2677 | + if (unlikely(test_thread_flag(TIF_MEMDIE) || | |
2678 | + fatal_signal_pending(current))) | |
2690 | 2679 | goto bypass; |
2691 | 2680 | |
2692 | 2681 | if (unlikely(task_in_memcg_oom(current))) |
... | ... | @@ -2695,14 +2684,6 @@ |
2695 | 2684 | if (gfp_mask & __GFP_NOFAIL) |
2696 | 2685 | oom = false; |
2697 | 2686 | again: |
2698 | - if (*ptr) { /* css should be a valid one */ | |
2699 | - memcg = *ptr; | |
2700 | - css_get(&memcg->css); | |
2701 | - } else { | |
2702 | - memcg = get_mem_cgroup_from_mm(mm); | |
2703 | - } | |
2704 | - if (mem_cgroup_is_root(memcg)) | |
2705 | - goto done; | |
2706 | 2687 | if (consume_stock(memcg, nr_pages)) |
2707 | 2688 | goto done; |
2708 | 2689 | |
2709 | 2690 | |
... | ... | @@ -2710,10 +2691,8 @@ |
2710 | 2691 | bool invoke_oom = oom && !nr_oom_retries; |
2711 | 2692 | |
2712 | 2693 | /* If killed, bypass charge */ |
2713 | - if (fatal_signal_pending(current)) { | |
2714 | - css_put(&memcg->css); | |
2694 | + if (fatal_signal_pending(current)) | |
2715 | 2695 | goto bypass; |
2716 | - } | |
2717 | 2696 | |
2718 | 2697 | ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, |
2719 | 2698 | nr_pages, invoke_oom); |
2720 | 2699 | |
2721 | 2700 | |
2722 | 2701 | |
... | ... | @@ -2722,17 +2701,12 @@ |
2722 | 2701 | break; |
2723 | 2702 | case CHARGE_RETRY: /* not in OOM situation but retry */ |
2724 | 2703 | batch = nr_pages; |
2725 | - css_put(&memcg->css); | |
2726 | - memcg = NULL; | |
2727 | 2704 | goto again; |
2728 | 2705 | case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ |
2729 | - css_put(&memcg->css); | |
2730 | 2706 | goto nomem; |
2731 | 2707 | case CHARGE_NOMEM: /* OOM routine works */ |
2732 | - if (!oom || invoke_oom) { | |
2733 | - css_put(&memcg->css); | |
2708 | + if (!oom || invoke_oom) | |
2734 | 2709 | goto nomem; |
2735 | - } | |
2736 | 2710 | nr_oom_retries--; |
2737 | 2711 | break; |
2738 | 2712 | } |
2739 | 2713 | |
2740 | 2714 | |
2741 | 2715 | |
2742 | 2716 | |
... | ... | @@ -2741,19 +2715,43 @@ |
2741 | 2715 | if (batch > nr_pages) |
2742 | 2716 | refill_stock(memcg, batch - nr_pages); |
2743 | 2717 | done: |
2744 | - css_put(&memcg->css); | |
2745 | - *ptr = memcg; | |
2746 | 2718 | return 0; |
2747 | 2719 | nomem: |
2748 | - if (!(gfp_mask & __GFP_NOFAIL)) { | |
2749 | - *ptr = NULL; | |
2720 | + if (!(gfp_mask & __GFP_NOFAIL)) | |
2750 | 2721 | return -ENOMEM; |
2751 | - } | |
2752 | 2722 | bypass: |
2753 | - *ptr = root_mem_cgroup; | |
2754 | 2723 | return -EINTR; |
2755 | 2724 | } |
2756 | 2725 | |
2726 | +/** | |
2727 | + * mem_cgroup_try_charge_mm - try charging a mm | |
2728 | + * @mm: mm_struct to charge | |
2729 | + * @nr_pages: number of pages to charge | |
2730 | + * @oom: trigger OOM if reclaim fails | |
2731 | + * | |
2732 | + * Returns the charged mem_cgroup associated with the given mm_struct or | |
2733 | + * NULL the charge failed. | |
2734 | + */ | |
2735 | +static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm, | |
2736 | + gfp_t gfp_mask, | |
2737 | + unsigned int nr_pages, | |
2738 | + bool oom) | |
2739 | + | |
2740 | +{ | |
2741 | + struct mem_cgroup *memcg; | |
2742 | + int ret; | |
2743 | + | |
2744 | + memcg = get_mem_cgroup_from_mm(mm); | |
2745 | + ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom); | |
2746 | + css_put(&memcg->css); | |
2747 | + if (ret == -EINTR) | |
2748 | + memcg = root_mem_cgroup; | |
2749 | + else if (ret) | |
2750 | + memcg = NULL; | |
2751 | + | |
2752 | + return memcg; | |
2753 | +} | |
2754 | + | |
2757 | 2755 | /* |
2758 | 2756 | * Somemtimes we have to undo a charge we got by try_charge(). |
2759 | 2757 | * This function is for that and do uncharge, put css's refcnt. |
2760 | 2758 | |
2761 | 2759 | |
... | ... | @@ -2949,20 +2947,17 @@ |
2949 | 2947 | static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) |
2950 | 2948 | { |
2951 | 2949 | struct res_counter *fail_res; |
2952 | - struct mem_cgroup *_memcg; | |
2953 | 2950 | int ret = 0; |
2954 | 2951 | |
2955 | 2952 | ret = res_counter_charge(&memcg->kmem, size, &fail_res); |
2956 | 2953 | if (ret) |
2957 | 2954 | return ret; |
2958 | 2955 | |
2959 | - _memcg = memcg; | |
2960 | - ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT, | |
2961 | - &_memcg, oom_gfp_allowed(gfp)); | |
2962 | - | |
2956 | + ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT, | |
2957 | + oom_gfp_allowed(gfp)); | |
2963 | 2958 | if (ret == -EINTR) { |
2964 | 2959 | /* |
2965 | - * __mem_cgroup_try_charge() chosed to bypass to root due to | |
2960 | + * mem_cgroup_try_charge() chosed to bypass to root due to | |
2966 | 2961 | * OOM kill or fatal signal. Since our only options are to |
2967 | 2962 | * either fail the allocation or charge it to this cgroup, do |
2968 | 2963 | * it as a temporary condition. But we can't fail. From a |
... | ... | @@ -2972,7 +2967,7 @@ |
2972 | 2967 | * |
2973 | 2968 | * This condition will only trigger if the task entered |
2974 | 2969 | * memcg_charge_kmem in a sane state, but was OOM-killed during |
2975 | - * __mem_cgroup_try_charge() above. Tasks that were already | |
2970 | + * mem_cgroup_try_charge() above. Tasks that were already | |
2976 | 2971 | * dying when the allocation triggers should have been already |
2977 | 2972 | * directed to the root cgroup in memcontrol.h |
2978 | 2973 | */ |
2979 | 2974 | |
2980 | 2975 | |
... | ... | @@ -3826,10 +3821,9 @@ |
3826 | 3821 | int mem_cgroup_newpage_charge(struct page *page, |
3827 | 3822 | struct mm_struct *mm, gfp_t gfp_mask) |
3828 | 3823 | { |
3829 | - struct mem_cgroup *memcg = NULL; | |
3830 | 3824 | unsigned int nr_pages = 1; |
3825 | + struct mem_cgroup *memcg; | |
3831 | 3826 | bool oom = true; |
3832 | - int ret; | |
3833 | 3827 | |
3834 | 3828 | if (mem_cgroup_disabled()) |
3835 | 3829 | return 0; |
... | ... | @@ -3848,9 +3842,9 @@ |
3848 | 3842 | oom = false; |
3849 | 3843 | } |
3850 | 3844 | |
3851 | - ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); | |
3852 | - if (ret == -ENOMEM) | |
3853 | - return ret; | |
3845 | + memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom); | |
3846 | + if (!memcg) | |
3847 | + return -ENOMEM; | |
3854 | 3848 | __mem_cgroup_commit_charge(memcg, page, nr_pages, |
3855 | 3849 | MEM_CGROUP_CHARGE_TYPE_ANON, false); |
3856 | 3850 | return 0; |
... | ... | @@ -3867,7 +3861,7 @@ |
3867 | 3861 | gfp_t mask, |
3868 | 3862 | struct mem_cgroup **memcgp) |
3869 | 3863 | { |
3870 | - struct mem_cgroup *memcg; | |
3864 | + struct mem_cgroup *memcg = NULL; | |
3871 | 3865 | struct page_cgroup *pc; |
3872 | 3866 | int ret; |
3873 | 3867 | |
3874 | 3868 | |
3875 | 3869 | |
3876 | 3870 | |
3877 | 3871 | |
... | ... | @@ -3880,31 +3874,29 @@ |
3880 | 3874 | * in turn serializes uncharging. |
3881 | 3875 | */ |
3882 | 3876 | if (PageCgroupUsed(pc)) |
3883 | - return 0; | |
3884 | - if (!do_swap_account) | |
3885 | - goto charge_cur_mm; | |
3886 | - memcg = try_get_mem_cgroup_from_page(page); | |
3877 | + goto out; | |
3878 | + if (do_swap_account) | |
3879 | + memcg = try_get_mem_cgroup_from_page(page); | |
3887 | 3880 | if (!memcg) |
3888 | - goto charge_cur_mm; | |
3889 | - *memcgp = memcg; | |
3890 | - ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true); | |
3881 | + memcg = get_mem_cgroup_from_mm(mm); | |
3882 | + ret = mem_cgroup_try_charge(memcg, mask, 1, true); | |
3891 | 3883 | css_put(&memcg->css); |
3892 | 3884 | if (ret == -EINTR) |
3893 | - ret = 0; | |
3894 | - return ret; | |
3895 | -charge_cur_mm: | |
3896 | - ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true); | |
3897 | - if (ret == -EINTR) | |
3898 | - ret = 0; | |
3899 | - return ret; | |
3885 | + memcg = root_mem_cgroup; | |
3886 | + else if (ret) | |
3887 | + return ret; | |
3888 | +out: | |
3889 | + *memcgp = memcg; | |
3890 | + return 0; | |
3900 | 3891 | } |
3901 | 3892 | |
3902 | 3893 | int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, |
3903 | 3894 | gfp_t gfp_mask, struct mem_cgroup **memcgp) |
3904 | 3895 | { |
3905 | - *memcgp = NULL; | |
3906 | - if (mem_cgroup_disabled()) | |
3896 | + if (mem_cgroup_disabled()) { | |
3897 | + *memcgp = NULL; | |
3907 | 3898 | return 0; |
3899 | + } | |
3908 | 3900 | /* |
3909 | 3901 | * A racing thread's fault, or swapoff, may have already |
3910 | 3902 | * updated the pte, and even removed page from swap cache: in |
3911 | 3903 | |
... | ... | @@ -3912,12 +3904,13 @@ |
3912 | 3904 | * there's also a KSM case which does need to charge the page. |
3913 | 3905 | */ |
3914 | 3906 | if (!PageSwapCache(page)) { |
3915 | - int ret; | |
3907 | + struct mem_cgroup *memcg; | |
3916 | 3908 | |
3917 | - ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true); | |
3918 | - if (ret == -EINTR) | |
3919 | - ret = 0; | |
3920 | - return ret; | |
3909 | + memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); | |
3910 | + if (!memcg) | |
3911 | + return -ENOMEM; | |
3912 | + *memcgp = memcg; | |
3913 | + return 0; | |
3921 | 3914 | } |
3922 | 3915 | return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp); |
3923 | 3916 | } |
3924 | 3917 | |
... | ... | @@ -3964,8 +3957,8 @@ |
3964 | 3957 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
3965 | 3958 | gfp_t gfp_mask) |
3966 | 3959 | { |
3967 | - struct mem_cgroup *memcg = NULL; | |
3968 | 3960 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; |
3961 | + struct mem_cgroup *memcg; | |
3969 | 3962 | int ret; |
3970 | 3963 | |
3971 | 3964 | if (mem_cgroup_disabled()) |
3972 | 3965 | |
3973 | 3966 | |
... | ... | @@ -3973,23 +3966,28 @@ |
3973 | 3966 | if (PageCompound(page)) |
3974 | 3967 | return 0; |
3975 | 3968 | |
3976 | - if (!PageSwapCache(page)) { | |
3977 | - /* | |
3978 | - * Page cache insertions can happen without an actual | |
3979 | - * task context, e.g. during disk probing on boot. | |
3980 | - */ | |
3981 | - if (!mm) | |
3982 | - memcg = root_mem_cgroup; | |
3983 | - ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true); | |
3984 | - if (ret != -ENOMEM) | |
3985 | - __mem_cgroup_commit_charge(memcg, page, 1, type, false); | |
3986 | - } else { /* page is swapcache/shmem */ | |
3969 | + if (PageSwapCache(page)) { /* shmem */ | |
3987 | 3970 | ret = __mem_cgroup_try_charge_swapin(mm, page, |
3988 | 3971 | gfp_mask, &memcg); |
3989 | - if (!ret) | |
3990 | - __mem_cgroup_commit_charge_swapin(page, memcg, type); | |
3972 | + if (ret) | |
3973 | + return ret; | |
3974 | + __mem_cgroup_commit_charge_swapin(page, memcg, type); | |
3975 | + return 0; | |
3991 | 3976 | } |
3992 | - return ret; | |
3977 | + | |
3978 | + /* | |
3979 | + * Page cache insertions can happen without an actual mm | |
3980 | + * context, e.g. during disk probing on boot. | |
3981 | + */ | |
3982 | + if (unlikely(!mm)) | |
3983 | + memcg = root_mem_cgroup; | |
3984 | + else { | |
3985 | + memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); | |
3986 | + if (!memcg) | |
3987 | + return -ENOMEM; | |
3988 | + } | |
3989 | + __mem_cgroup_commit_charge(memcg, page, 1, type, false); | |
3990 | + return 0; | |
3993 | 3991 | } |
3994 | 3992 | |
3995 | 3993 | static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, |
... | ... | @@ -6601,8 +6599,7 @@ |
6601 | 6599 | batch_count = PRECHARGE_COUNT_AT_ONCE; |
6602 | 6600 | cond_resched(); |
6603 | 6601 | } |
6604 | - ret = __mem_cgroup_try_charge(NULL, | |
6605 | - GFP_KERNEL, 1, &memcg, false); | |
6602 | + ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false); | |
6606 | 6603 | if (ret) |
6607 | 6604 | /* mem_cgroup_clear_mc() will do uncharge later */ |
6608 | 6605 | return ret; |