Commit 2a4073c2bb288193f5e7a0d57e9cf2f9786dddc3

Authored by Hugh Dickins
Committed by Greg Kroah-Hartman
1 parent 671f9c9e2f

SHM_UNLOCK: fix long unpreemptible section

commit 85046579bde15e532983438f86b36856e358f417 upstream.

scan_mapping_unevictable_pages() is used to make SysV SHM_LOCKed pages
evictable again once the shared memory is unlocked.  It does this with
pagevec_lookup()s across the whole object (which might occupy most of
memory), and takes 300ms to unlock 7GB here.  A cond_resched() every
PAGEVEC_SIZE pages would be good.

However, KOSAKI-san points out that this is called under shmem.c's
info->lock, and it's also under shm.c's shm_lock(), both spinlocks.
There is no strong reason for that: we need to take these pages off the
unevictable list soonish, but those locks are not required for it.

So move the call to scan_mapping_unevictable_pages() from shmem.c's
unlock handling up to shm.c's unlock handling.  Remove the recently
added barrier, not needed now we have spin_unlock() before the scan.

Use get_file(), with subsequent fput(), to make sure we have a reference
to mapping throughout scan_mapping_unevictable_pages(): that's something
that was previously guaranteed by the shm_lock().

Remove shmctl's lru_add_drain_all(): we don't fault in pages at SHM_LOCK
time, and we lazily discover them to be Unevictable later, so it serves
no purpose for SHM_LOCK; and serves no purpose for SHM_UNLOCK, since
pages still on pagevec are not marked Unevictable.

The original code avoided redundant rescans by checking VM_LOCKED flag
at its level: now avoid them by checking shp's SHM_LOCKED.

The original code called scan_mapping_unevictable_pages() on a locked
area at shm_destroy() time: perhaps we once had accounting cross-checks
which required that, but not now, so skip the overhead and just let
inode eviction deal with them.

Put check_move_unevictable_page() and scan_mapping_unevictable_pages()
under CONFIG_SHMEM (with stub for the TINY case when ramfs is used),
more as comment than to save space; comment them used for SHM_UNLOCK.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michel Lespinasse <walken@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

Showing 3 changed files with 33 additions and 23 deletions Side-by-side Diff

... ... @@ -870,10 +870,8 @@
870 870 case SHM_LOCK:
871 871 case SHM_UNLOCK:
872 872 {
873   - struct file *uninitialized_var(shm_file);
  873 + struct file *shm_file;
874 874  
875   - lru_add_drain_all(); /* drain pagevecs to lru lists */
876   -
877 875 shp = shm_lock_check(ns, shmid);
878 876 if (IS_ERR(shp)) {
879 877 err = PTR_ERR(shp);
880 878  
881 879  
882 880  
883 881  
... ... @@ -895,22 +893,31 @@
895 893 err = security_shm_shmctl(shp, cmd);
896 894 if (err)
897 895 goto out_unlock;
898   -
899   - if(cmd==SHM_LOCK) {
  896 +
  897 + shm_file = shp->shm_file;
  898 + if (is_file_hugepages(shm_file))
  899 + goto out_unlock;
  900 +
  901 + if (cmd == SHM_LOCK) {
900 902 struct user_struct *user = current_user();
901   - if (!is_file_hugepages(shp->shm_file)) {
902   - err = shmem_lock(shp->shm_file, 1, user);
903   - if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
904   - shp->shm_perm.mode |= SHM_LOCKED;
905   - shp->mlock_user = user;
906   - }
  903 + err = shmem_lock(shm_file, 1, user);
  904 + if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
  905 + shp->shm_perm.mode |= SHM_LOCKED;
  906 + shp->mlock_user = user;
907 907 }
908   - } else if (!is_file_hugepages(shp->shm_file)) {
909   - shmem_lock(shp->shm_file, 0, shp->mlock_user);
910   - shp->shm_perm.mode &= ~SHM_LOCKED;
911   - shp->mlock_user = NULL;
  908 + goto out_unlock;
912 909 }
  910 +
  911 + /* SHM_UNLOCK */
  912 + if (!(shp->shm_perm.mode & SHM_LOCKED))
  913 + goto out_unlock;
  914 + shmem_lock(shm_file, 0, shp->mlock_user);
  915 + shp->shm_perm.mode &= ~SHM_LOCKED;
  916 + shp->mlock_user = NULL;
  917 + get_file(shm_file);
913 918 shm_unlock(shp);
  919 + scan_mapping_unevictable_pages(shm_file->f_mapping);
  920 + fput(shm_file);
914 921 goto out;
915 922 }
916 923 case IPC_RMID:
... ... @@ -1068,13 +1068,6 @@
1068 1068 user_shm_unlock(inode->i_size, user);
1069 1069 info->flags &= ~VM_LOCKED;
1070 1070 mapping_clear_unevictable(file->f_mapping);
1071   - /*
1072   - * Ensure that a racing putback_lru_page() can see
1073   - * the pages of this mapping are evictable when we
1074   - * skip them due to !PageLRU during the scan.
1075   - */
1076   - smp_mb__after_clear_bit();
1077   - scan_mapping_unevictable_pages(file->f_mapping);
1078 1071 }
1079 1072 retval = 0;
1080 1073  
... ... @@ -3353,6 +3353,7 @@
3353 3353 return 1;
3354 3354 }
3355 3355  
  3356 +#ifdef CONFIG_SHMEM
3356 3357 /**
3357 3358 * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
3358 3359 * @page: page to check evictability and move to appropriate lru list
... ... @@ -3363,6 +3364,8 @@
3363 3364 *
3364 3365 * Restrictions: zone->lru_lock must be held, page must be on LRU and must
3365 3366 * have PageUnevictable set.
  3367 + *
  3368 + * This function is only used for SysV IPC SHM_UNLOCK.
3366 3369 */
3367 3370 static void check_move_unevictable_page(struct page *page, struct zone *zone)
3368 3371 {
... ... @@ -3396,6 +3399,8 @@
3396 3399 *
3397 3400 * Scan all pages in mapping. Check unevictable pages for
3398 3401 * evictability and move them to the appropriate zone lru list.
  3402 + *
  3403 + * This function is only used for SysV IPC SHM_UNLOCK.
3399 3404 */
3400 3405 void scan_mapping_unevictable_pages(struct address_space *mapping)
3401 3406 {
3402 3407  
3403 3408  
... ... @@ -3441,9 +3446,14 @@
3441 3446 pagevec_release(&pvec);
3442 3447  
3443 3448 count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
  3449 + cond_resched();
3444 3450 }
3445   -
3446 3451 }
  3452 +#else
  3453 +void scan_mapping_unevictable_pages(struct address_space *mapping)
  3454 +{
  3455 +}
  3456 +#endif /* CONFIG_SHMEM */
3447 3457  
3448 3458 static void warn_scan_unevictable_pages(void)
3449 3459 {