Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- commit f769193bec18d1384ff8b2094088d46972da1673
- Author: Juhyung Park <qkrwngud825@gmail.com>
- Date: Fri May 20 07:00:12 2022 +0900
- Samsung
- Signed-off-by: Juhyung Park <qkrwngud825@gmail.com>
- diff --git a/mm/page_alloc.c b/mm/page_alloc.c
- index 46d253155a255..f96d8044b5a3d 100644
- --- a/mm/page_alloc.c
- +++ b/mm/page_alloc.c
- @@ -64,12 +64,12 @@
- #include <linux/page_owner.h>
- #include <linux/kthread.h>
- #include <linux/memcontrol.h>
- -#include <linux/show_mem_notifier.h>
- #include <linux/ftrace.h>
- #include <linux/lockdep.h>
- #include <linux/nmi.h>
- #include <linux/psi.h>
- #include <linux/khugepaged.h>
- +#include <linux/sched/cputime.h>
- #include <asm/sections.h>
- #include <asm/tlbflush.h>
- @@ -81,6 +81,36 @@
- static DEFINE_MUTEX(pcp_batch_high_lock);
- #define MIN_PERCPU_PAGELIST_FRACTION (8)
- +/* If RANK_BIT position in physical address is zero, it is main rank */
- +#define is_main_rank(page) !rankid(page)
- +
- +static inline void rank_list_add(struct page *page, struct list_head *list)
- +{
- + if (is_main_rank(page))
- + list_add(&(page)->lru, list);
- + else
- + list_add_tail(&(page)->lru, list);
- +}
- +
- +static inline void rank_free_area_add(struct page *page, struct free_area *area,
- + int migratetype)
- +{
- + if (is_main_rank(page))
- + add_to_free_area(page, area, migratetype);
- + else
- + add_to_free_area_tail(page, area, migratetype);
- +}
- +
- +static inline void rank_free_area_move(struct page *page,
- + struct free_area *area,
- + int migratetype)
- +{
- + if (is_main_rank(page))
- + move_to_free_area(page, area, migratetype);
- + else
- + move_to_free_area_tail(page, area, migratetype);
- +}
- +
- #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
- DEFINE_PER_CPU(int, numa_node);
- EXPORT_PER_CPU_SYMBOL(numa_node);
- @@ -187,24 +217,6 @@ static int __init early_init_on_free(char *buf)
- }
- early_param("init_on_free", early_init_on_free);
- -/*
- - * A cached value of the page's pageblock's migratetype, used when the page is
- - * put on a pcplist. Used to avoid the pageblock migratetype lookup when
- - * freeing from pcplists in most cases, at the cost of possibly becoming stale.
- - * Also the migratetype set in the page does not necessarily match the pcplist
- - * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
- - * other index - this ensures that it will be put on the correct CMA freelist.
- - */
- -static inline int get_pcppage_migratetype(struct page *page)
- -{
- - return page->index;
- -}
- -
- -static inline void set_pcppage_migratetype(struct page *page, int migratetype)
- -{
- - page->index = migratetype;
- -}
- -
- #ifdef CONFIG_PM_SLEEP
- /*
- * The following functions are used by the suspend/hibernate code to temporarily
- @@ -919,7 +931,7 @@ static inline void __free_one_page(struct page *page,
- unsigned int max_order;
- struct capture_control *capc = task_capc(zone);
- - max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
- + max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order);
- VM_BUG_ON(!zone_is_initialized(zone));
- VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
- @@ -932,7 +944,7 @@ static inline void __free_one_page(struct page *page,
- VM_BUG_ON_PAGE(bad_range(zone, page), page);
- continue_merging:
- - while (order < max_order - 1) {
- + while (order < max_order) {
- if (compaction_capture(capc, page, order, migratetype)) {
- __mod_zone_freepage_state(zone, -(1 << order),
- migratetype);
- @@ -958,7 +970,7 @@ continue_merging:
- pfn = combined_pfn;
- order++;
- }
- - if (max_order < MAX_ORDER) {
- + if (order < MAX_ORDER - 1) {
- /* If we are here, it means order is >= pageblock_order.
- * We want to prevent merge between freepages on isolate
- * pageblock and normal pageblock. Without this, pageblock
- @@ -979,7 +991,7 @@ continue_merging:
- is_migrate_isolate(buddy_mt)))
- goto done_merging;
- }
- - max_order++;
- + max_order = order + 1;
- goto continue_merging;
- }
- @@ -1003,17 +1015,17 @@ done_merging:
- higher_buddy = higher_page + (buddy_pfn - combined_pfn);
- if (pfn_valid_within(buddy_pfn) &&
- page_is_buddy(higher_page, higher_buddy, order + 1)) {
- - add_to_free_area_tail(page, &zone->free_area[order],
- - migratetype);
- + rank_free_area_add(page, &zone->free_area[order],
- + migratetype);
- return;
- }
- }
- if (is_shuffle_order(order))
- add_to_free_area_random(page, &zone->free_area[order],
- - migratetype);
- + migratetype);
- else
- - add_to_free_area(page, &zone->free_area[order], migratetype);
- + rank_free_area_add(page, &zone->free_area[order], migratetype);
- }
- @@ -1430,15 +1442,35 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
- }
- }
- +#ifdef CONFIG_HUGEPAGE_POOL
- +static void __free_pages_ok(struct page *page, unsigned int order)
- +{
- + ___free_pages_ok(page, order, false);
- +}
- +
- +void ___free_pages_ok(struct page *page, unsigned int order,
- + bool skip_hugepage_pool)
- +#else
- static void __free_pages_ok(struct page *page, unsigned int order)
- +#endif
- {
- unsigned long flags;
- int migratetype;
- unsigned long pfn = page_to_pfn(page);
- +#ifdef CONFIG_HUGEPAGE_POOL
- + if (!skip_hugepage_pool && !free_pages_prepare(page, order, true))
- + return;
- +#else
- if (!free_pages_prepare(page, order, true))
- return;
- +#endif
- +#ifdef CONFIG_HUGEPAGE_POOL
- + if (!skip_hugepage_pool && order == HUGEPAGE_ORDER &&
- + insert_hugepage_pool(page, order))
- + return;
- +#endif
- migratetype = get_pfnblock_migratetype(page, pfn);
- local_irq_save(flags);
- __count_vm_events(PGFREE, 1 << order);
- @@ -2049,7 +2081,7 @@ static inline void expand(struct zone *zone, struct page *page,
- if (set_page_guard(zone, &page[size], high, migratetype))
- continue;
- - add_to_free_area(&page[size], area, migratetype);
- + rank_free_area_add(&page[size], area, migratetype);
- set_page_order(&page[size], high);
- }
- }
- @@ -2166,8 +2198,13 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
- set_page_owner(page, order, gfp_flags);
- }
- +#ifdef CONFIG_HUGEPAGE_POOL
- +void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
- + unsigned int alloc_flags)
- +#else
- static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
- unsigned int alloc_flags)
- +#endif
- {
- post_alloc_hook(page, order, gfp_flags);
- @@ -2190,14 +2227,18 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
- }
- /*
- - * Go through the free lists for the given migratetype and remove
- - * the smallest available page from the freelists
- + * Search the free lists from requested order to MAX_ORDER to find
- + * the main rank page and returns the order if exists.
- + * If main rank page doesn't exist, returns the smallest order of
- + * available backup rank page.
- + *
- + * MAX_ORDER is returned if there's no available pages.
- */
- static __always_inline
- -struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
- - int migratetype)
- +unsigned int __get_min_rank_aware_order(struct zone *zone,
- + unsigned int order, int migratetype)
- {
- - unsigned int current_order;
- + unsigned int current_order, backup_order = MAX_ORDER;
- struct free_area *area;
- struct page *page;
- @@ -2205,15 +2246,36 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
- for (current_order = order; current_order < MAX_ORDER; ++current_order) {
- area = &(zone->free_area[current_order]);
- page = get_page_from_free_area(area, migratetype);
- - if (!page)
- - continue;
- - del_page_from_free_area(page, area);
- - expand(zone, page, order, current_order, area, migratetype);
- - set_pcppage_migratetype(page, migratetype);
- - return page;
- + if (page) {
- + if (is_main_rank(page))
- + return current_order;
- + if (backup_order == MAX_ORDER)
- + backup_order = current_order;
- + }
- }
- - return NULL;
- + return backup_order;
- +}
- +
- +static __always_inline
- +struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
- + int migratetype)
- +{
- + unsigned int current_order;
- + struct free_area *area;
- + struct page *page;
- +
- + current_order = __get_min_rank_aware_order(zone, order, migratetype);
- + if (current_order == MAX_ORDER)
- + return NULL;
- +
- + area = &(zone->free_area[current_order]);
- + page = get_page_from_free_area(area, migratetype);
- + del_page_from_free_area(page, area);
- + expand(zone, page, order, current_order, area, migratetype);
- + set_pcppage_migratetype(page, migratetype);
- +
- + return page;
- }
- @@ -2282,7 +2344,7 @@ static int move_freepages(struct zone *zone,
- VM_BUG_ON_PAGE(page_zone(page) != zone, page);
- order = page_order(page);
- - move_to_free_area(page, &zone->free_area[order], migratetype);
- + rank_free_area_move(page, &zone->free_area[order], migratetype);
- page += 1 << order;
- pages_moved += 1 << order;
- }
- @@ -2359,38 +2421,11 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
- return false;
- }
- -static bool boost_eligible(struct zone *z)
- -{
- - unsigned long high_wmark, threshold;
- - unsigned long reclaim_eligible, free_pages;
- -
- - high_wmark = z->_watermark[WMARK_HIGH];
- - reclaim_eligible = zone_page_state_snapshot(z, NR_ZONE_INACTIVE_FILE) +
- - zone_page_state_snapshot(z, NR_ZONE_ACTIVE_FILE);
- - free_pages = zone_page_state(z, NR_FREE_PAGES) -
- - zone_page_state(z, NR_FREE_CMA_PAGES);
- - threshold = high_wmark + (2 * mult_frac(high_wmark,
- - watermark_boost_factor, 10000));
- -
- - /*
- - * Don't boost watermark If we are already low on memory where the
- - * boosting can simply put the watermarks at higher levels for a
- - * longer duration of time and thus the other users relied on the
- - * watermarks are forced to choose unintended decissions. If memory
- - * is so low, kswapd in normal mode should help.
- - */
- -
- - if (reclaim_eligible < threshold && free_pages < threshold)
- - return false;
- -
- - return true;
- -}
- -
- static inline bool boost_watermark(struct zone *zone)
- {
- unsigned long max_boost;
- - if (!watermark_boost_factor || !boost_eligible(zone))
- + if (!watermark_boost_factor)
- return false;
- /*
- * Don't bother in zones that are unlikely to produce results.
- @@ -2506,7 +2541,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
- single_page:
- area = &zone->free_area[current_order];
- - move_to_free_area(page, area, start_type);
- + rank_free_area_move(page, area, start_type);
- }
- /*
- @@ -2837,7 +2872,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
- * for IO devices that can merge IO requests if the physical
- * pages are ordered properly.
- */
- - list_add_tail(&page->lru, list);
- + rank_list_add(page, list);
- alloced++;
- if (is_migrate_cma(get_pcppage_migratetype(page)))
- __mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
- @@ -3143,7 +3178,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
- }
- pcp = &this_cpu_ptr(zone->pageset)->pcp;
- - list_add(&page->lru, &pcp->lists[migratetype]);
- + rank_list_add(page, &pcp->lists[migratetype]);
- pcp->count++;
- if (pcp->count >= pcp->high) {
- unsigned long batch = READ_ONCE(pcp->batch);
- @@ -3329,7 +3364,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
- if (unlikely(list == NULL) ||
- unlikely(list_empty(list)))
- return NULL;
- -
- }
- page = list_first_entry(list, struct page, lru);
- @@ -3402,7 +3436,6 @@ struct page *rmqueue(struct zone *preferred_zone,
- if (!page)
- page = __rmqueue(zone, order, migratetype, alloc_flags);
- -
- } while (page && check_new_pages(page, order));
- spin_unlock(&zone->lock);
- @@ -3498,6 +3531,29 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
- #endif /* CONFIG_FAIL_PAGE_ALLOC */
- +static inline long __zone_watermark_unusable_free(struct zone *z,
- + unsigned int order, unsigned int alloc_flags)
- +{
- + const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
- + long unusable_free = (1 << order) - 1;
- +
- + /*
- + * If the caller does not have rights to ALLOC_HARDER then subtract
- + * the high-atomic reserves. This will over-estimate the size of the
- + * atomic reserve but it avoids a search.
- + */
- + if (likely(!alloc_harder))
- + unusable_free += z->nr_reserved_highatomic;
- +
- +#ifdef CONFIG_CMA
- + /* If allocation can't use CMA areas don't use free CMA pages */
- + if (!(alloc_flags & ALLOC_CMA))
- + unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
- +#endif
- +
- + return unusable_free;
- +}
- +
- noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
- {
- return __should_fail_alloc_page(gfp_mask, order);
- @@ -3519,19 +3575,12 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
- /* free_pages may go negative - that's OK */
- - free_pages -= (1 << order) - 1;
- + free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
- if (alloc_flags & ALLOC_HIGH)
- min -= min / 2;
- - /*
- - * If the caller does not have rights to ALLOC_HARDER then subtract
- - * the high-atomic reserves. This will over-estimate the size of the
- - * atomic reserve but it avoids a search.
- - */
- - if (likely(!alloc_harder)) {
- - free_pages -= z->nr_reserved_highatomic;
- - } else {
- + if (unlikely(alloc_harder)) {
- /*
- * OOM victims can try even harder than normal ALLOC_HARDER
- * users on the grounds that it's definitely going to be in
- @@ -3544,13 +3593,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
- min -= min / 4;
- }
- -
- -#ifdef CONFIG_CMA
- - /* If allocation can't use CMA areas don't use free CMA pages */
- - if (!(alloc_flags & ALLOC_CMA))
- - free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
- -#endif
- -
- /*
- * Check watermarks for an order-0 allocation request. If these
- * are not met, then a high-order request also cannot go ahead
- @@ -3572,14 +3614,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
- continue;
- for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
- -#ifdef CONFIG_CMA
- - /*
- - * Note that this check is needed only
- - * when MIGRATE_CMA < MIGRATE_PCPTYPES.
- - */
- - if (mt == MIGRATE_CMA)
- - continue;
- -#endif
- if (!free_area_empty(area, mt))
- return true;
- }
- @@ -3608,24 +3642,22 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
- unsigned long mark, int classzone_idx,
- unsigned int alloc_flags, gfp_t gfp_mask)
- {
- - long free_pages = zone_page_state(z, NR_FREE_PAGES);
- - long cma_pages = 0;
- + long free_pages;
- -#ifdef CONFIG_CMA
- - /* If allocation can't use CMA areas don't use free CMA pages */
- - if (!(alloc_flags & ALLOC_CMA))
- - cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
- -#endif
- + free_pages = zone_page_state(z, NR_FREE_PAGES);
- /*
- * Fast check for order-0 only. If this fails then the reserves
- - * need to be calculated. There is a corner case where the check
- - * passes but only the high-order atomic reserve are free. If
- - * the caller is !atomic then it'll uselessly search the free
- - * list. That corner case is then slower but it is harmless.
- + * need to be calculated.
- */
- - if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
- - return true;
- + if (!order) {
- + long fast_free;
- +
- + fast_free = free_pages;
- + fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
- + if (fast_free > mark + z->lowmem_reserve[classzone_idx])
- + return true;
- + }
- if (__zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
- free_pages))
- @@ -3783,20 +3815,6 @@ retry:
- }
- mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
- - /*
- - * Allow high, atomic, harder order-0 allocation requests
- - * to skip the ->watermark_boost for min watermark check.
- - * In doing so, check for:
- - * 1) ALLOC_WMARK_MIN - Allow to wake up kswapd in the
- - * slow path.
- - * 2) ALLOC_HIGH - Allow high priority requests.
- - * 3) ALLOC_HARDER - Allow (__GFP_ATOMIC && !__GFP_NOMEMALLOC),
- - * of the others.
- - */
- - if (unlikely(!order && !(alloc_flags & ALLOC_WMARK_MASK) &&
- - (alloc_flags & (ALLOC_HARDER | ALLOC_HIGH)))) {
- - mark = zone->_watermark[WMARK_MIN];
- - }
- if (!zone_watermark_fast(zone, order, mark,
- ac_classzone_idx(ac), alloc_flags,
- gfp_mask)) {
- @@ -3893,7 +3911,6 @@ static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
- filter &= ~SHOW_MEM_FILTER_NODES;
- show_mem(filter, nodemask);
- - show_mem_call_notifiers();
- }
- void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
- @@ -4308,7 +4325,8 @@ retry:
- */
- if (!page && !drained) {
- unreserve_highatomic_pageblock(ac, false);
- - drain_all_pages(NULL);
- + if (!need_memory_boosting(NULL))
- + drain_all_pages(NULL);
- drained = true;
- goto retry;
- }
- @@ -4367,8 +4385,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
- alloc_flags |= ALLOC_KSWAPD;
- #ifdef CONFIG_CMA
- - if ((gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) &&
- - (gfp_mask & __GFP_CMA))
- + if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
- alloc_flags |= ALLOC_CMA;
- #endif
- return alloc_flags;
- @@ -4558,13 +4575,19 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
- const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
- struct page *page = NULL;
- unsigned int alloc_flags;
- - unsigned long did_some_progress;
- + unsigned long did_some_progress = 0;
- enum compact_priority compact_priority;
- enum compact_result compact_result;
- int compaction_retries;
- int no_progress_loops;
- unsigned int cpuset_mems_cookie;
- int reserve_flags;
- + unsigned long pages_reclaimed = 0;
- + int retry_loop_count = 0;
- + unsigned long jiffies_s = jiffies;
- + u64 utime, stime_s, stime_e, stime_d;
- +
- + task_cputime(current, &utime, &stime_s);
- /*
- * We also sanity check to catch abuse of atomic reserves being used by
- @@ -4679,6 +4702,7 @@ retry_cpuset:
- }
- retry:
- + retry_loop_count++;
- /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
- if (alloc_flags & ALLOC_KSWAPD)
- wake_all_kswapds(order, gfp_mask, ac);
- @@ -4711,13 +4735,10 @@ retry:
- if (current->flags & PF_MEMALLOC)
- goto nopage;
- - if (fatal_signal_pending(current) && !(gfp_mask & __GFP_NOFAIL) &&
- - (gfp_mask & __GFP_FS))
- - goto nopage;
- -
- /* Try direct reclaim and then allocating */
- page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
- &did_some_progress);
- + pages_reclaimed += did_some_progress;
- if (page)
- goto got_pg;
- @@ -4825,6 +4846,29 @@ fail:
- warn_alloc(gfp_mask, ac->nodemask,
- "page allocation failure: order:%u", order);
- got_pg:
- + task_cputime(current, &utime, &stime_e);
- + stime_d = stime_e - stime_s;
- + if (stime_d / NSEC_PER_MSEC > 256) {
- + pg_data_t *pgdat;
- +
- + unsigned long a_anon = 0;
- + unsigned long in_anon = 0;
- + unsigned long a_file = 0;
- + unsigned long in_file = 0;
- + for_each_online_pgdat(pgdat) {
- + a_anon += node_page_state(pgdat, NR_ACTIVE_ANON);
- + in_anon += node_page_state(pgdat, NR_INACTIVE_ANON);
- + a_file += node_page_state(pgdat, NR_ACTIVE_FILE);
- + in_file += node_page_state(pgdat, NR_INACTIVE_FILE);
- + }
- + pr_info("alloc stall: timeJS(ms):%u|%llu rec:%lu|%lu ret:%d o:%d gfp:%#x(%pGg) AaiFai:%lukB|%lukB|%lukB|%lukB\n",
- + jiffies_to_msecs(jiffies - jiffies_s),
- + stime_d / NSEC_PER_MSEC,
- + did_some_progress, pages_reclaimed, retry_loop_count,
- + order, gfp_mask, &gfp_mask,
- + a_anon << (PAGE_SHIFT-10), in_anon << (PAGE_SHIFT-10),
- + a_file << (PAGE_SHIFT-10), in_file << (PAGE_SHIFT-10));
- + }
- return page;
- }
- @@ -4854,8 +4898,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
- if (should_fail_alloc_page(gfp_mask, order))
- return false;
- - if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE &&
- - (gfp_mask & __GFP_CMA))
- + if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
- *alloc_flags |= ALLOC_CMA;
- return true;
- @@ -5299,6 +5342,9 @@ long si_mem_available(void)
- reclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE) +
- global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
- available += reclaimable - min(reclaimable / 2, wmark_low);
- +#ifdef CONFIG_ION_RBIN_HEAP
- + available += atomic_read(&rbin_cached_pages);
- +#endif
- if (available < 0)
- available = 0;
- @@ -5309,6 +5355,9 @@ EXPORT_SYMBOL_GPL(si_mem_available);
- void si_meminfo(struct sysinfo *val)
- {
- val->totalram = totalram_pages();
- +#ifdef CONFIG_ION_RBIN_HEAP
- + val->totalram += totalrbin_pages;
- +#endif
- val->sharedram = global_node_page_state(NR_SHMEM);
- val->freeram = global_zone_page_state(NR_FREE_PAGES);
- val->bufferram = nr_blockdev_pages();
- @@ -7056,8 +7105,6 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
- pg_data_t *pgdat = NODE_DATA(nid);
- unsigned long start_pfn = 0;
- unsigned long end_pfn = 0;
- - u64 i;
- - phys_addr_t start, end;
- /* pg_data_t should be reset to zero when it's allocated */
- WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
- @@ -7071,10 +7118,6 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
- (u64)start_pfn << PAGE_SHIFT,
- end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
- #else
- - for_each_mem_range(i, &memblock.memory, NULL, nid, MEMBLOCK_NONE,
- - &start, &end, NULL)
- - subsection_map_init((unsigned long)start >> PAGE_SHIFT,
- - (unsigned long)(end - start) >> PAGE_SHIFT);
- start_pfn = node_start_pfn;
- #endif
- calculate_node_totalpages(pgdat, start_pfn, end_pfn,
- @@ -7692,14 +7735,15 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
- free_reserved_page(page);
- }
- - if (pages && s)
- - pr_info("Freeing %s memory: %ldK\n",
- - s, pages << (PAGE_SHIFT - 10));
- + if (pages && s) {
- + pr_info("Freeing %s memory: %ldK\n", s, pages << (PAGE_SHIFT - 10));
- + if (!strcmp(s, "initrd") || !strcmp(s, "unused kernel")) {
- + long size;
- -#ifdef CONFIG_HAVE_MEMBLOCK
- - memblock_dbg("memblock_free: [%#016llx-%#016llx] %pS\n",
- - __pa(start), __pa(end), (void *)_RET_IP_);
- -#endif
- + size = -1 * (long)(pages << PAGE_SHIFT);
- + memblock_memsize_mod_kernel_size(size);
- + }
- + }
- return pages;
- }
- @@ -7978,11 +8022,11 @@ static void __setup_per_zone_wmarks(void)
- mult_frac(zone_managed_pages(zone),
- watermark_scale_factor, 10000));
- - zone->watermark_boost = 0;
- zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) +
- low + tmp;
- zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) +
- low + tmp * 2;
- + zone->watermark_boost = 0;
- spin_unlock_irqrestore(&zone->lock, flags);
- }
- @@ -8097,22 +8141,6 @@ int watermark_boost_factor_sysctl_handler(struct ctl_table *table, int write,
- return 0;
- }
- -#ifdef CONFIG_MULTIPLE_KSWAPD
- -int kswapd_threads_sysctl_handler(struct ctl_table *table, int write,
- - void __user *buffer, size_t *length, loff_t *ppos)
- -{
- - int rc;
- -
- - rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
- - if (rc)
- - return rc;
- -
- - if (write)
- - update_kswapd_threads();
- -
- - return 0;
- -}
- -#endif
- int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *length, loff_t *ppos)
- {
- @@ -8522,7 +8550,8 @@ static unsigned long pfn_max_align_up(unsigned long pfn)
- /* [start, end) must belong to a single zone. */
- static int __alloc_contig_migrate_range(struct compact_control *cc,
- - unsigned long start, unsigned long end)
- + unsigned long start, unsigned long end,
- + bool drain)
- {
- /* This function is based on compact_zone() from compaction.c. */
- unsigned long nr_reclaimed;
- @@ -8530,7 +8559,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
- unsigned int tries = 0;
- int ret = 0;
- - migrate_prep();
- + if (drain)
- + migrate_prep();
- while (pfn < end || !list_empty(&cc->migratepages)) {
- if (fatal_signal_pending(current)) {
- @@ -8586,8 +8616,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
- * pages which PFN is in [start, end) are allocated for the caller and
- * need to be freed with free_contig_range().
- */
- -int alloc_contig_range(unsigned long start, unsigned long end,
- - unsigned migratetype, gfp_t gfp_mask)
- +int __alloc_contig_range(unsigned long start, unsigned long end,
- + unsigned migratetype, gfp_t gfp_mask, bool drain)
- {
- unsigned long outer_start, outer_end;
- unsigned int order;
- @@ -8646,7 +8676,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
- * allocated. So, if we fall through be sure to clear ret so that
- * -EBUSY is not accidentally used or returned to caller.
- */
- - ret = __alloc_contig_migrate_range(&cc, start, end);
- + ret = __alloc_contig_migrate_range(&cc, start, end, drain);
- if (ret && ret != -EBUSY)
- goto done;
- ret =0;
- @@ -8668,37 +8698,40 @@ int alloc_contig_range(unsigned long start, unsigned long end,
- * isolated thus they won't get removed from buddy.
- */
- - lru_add_drain_all();
- -
- order = 0;
- outer_start = start;
- - while (!PageBuddy(pfn_to_page(outer_start))) {
- - if (++order >= MAX_ORDER) {
- - outer_start = start;
- - break;
- - }
- - outer_start &= ~0UL << order;
- - }
- - if (outer_start != start) {
- - order = page_order(pfn_to_page(outer_start));
- + if (drain) {
- + lru_add_drain_all();
- + drain_all_pages(cc.zone);
- - /*
- - * outer_start page could be small order buddy page and
- - * it doesn't include start page. Adjust outer_start
- - * in this case to report failed page properly
- - * on tracepoint in test_pages_isolated()
- - */
- - if (outer_start + (1UL << order) <= start)
- - outer_start = start;
- - }
- + while (!PageBuddy(pfn_to_page(outer_start))) {
- + if (++order >= MAX_ORDER) {
- + outer_start = start;
- + break;
- + }
- + outer_start &= ~0UL << order;
- + }
- - /* Make sure the range is really isolated. */
- - if (test_pages_isolated(outer_start, end, false)) {
- - pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
- - __func__, outer_start, end);
- - ret = -EBUSY;
- - goto done;
- + if (outer_start != start) {
- + order = page_order(pfn_to_page(outer_start));
- +
- + /*
- + * outer_start page could be small order buddy page and
- + * it doesn't include start page. Adjust outer_start
- + * in this case to report failed page properly
- + * on tracepoint in test_pages_isolated()
- + */
- + if (outer_start + (1UL << order) <= start)
- + outer_start = start;
- + }
- + /* Make sure the range is really isolated. */
- + if (test_pages_isolated(outer_start, end, false)) {
- + pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
- + __func__, outer_start, end);
- + ret = -EBUSY;
- + goto done;
- + }
- }
- /* Grab isolated pages from freelists. */
- @@ -8722,6 +8755,17 @@ done:
- #endif
- return ret;
- }
- +int alloc_contig_range(unsigned long start, unsigned long end,
- + unsigned migratetype, gfp_t gfp_mask)
- +{
- + return __alloc_contig_range(start, end, migratetype, gfp_mask, true);
- +}
- +
- +int alloc_contig_range_fast(unsigned long start, unsigned long end,
- + unsigned migratetype)
- +{
- + return __alloc_contig_range(start, end, migratetype, GFP_KERNEL, false);
- +}
- #endif /* CONFIG_CONTIG_ALLOC */
- void free_contig_range(unsigned long pfn, unsigned int nr_pages)
- diff --git a/mm/vmscan.c b/mm/vmscan.c
- index 2d57e7eddfeb8..753985b1051aa 100644
- --- a/mm/vmscan.c
- +++ b/mm/vmscan.c
- @@ -89,9 +89,12 @@ struct scan_control {
- unsigned int may_swap:1;
- /*
- - * Cgroups are not reclaimed below their configured memory.low,
- - * unless we threaten to OOM. If any cgroups are skipped due to
- - * memory.low and nothing was reclaimed, go back for memory.low.
- + * Cgroup memory below memory.low is protected as long as we
- + * don't threaten to OOM. If any cgroup is reclaimed at
- + * reduced force or passed over entirely due to its memory.low
- + * setting (memcg_low_skipped), and nothing is reclaimed as a
- + * result, then go back for one more cycle that reclaims the protected
- + * memory (memcg_low_reclaim) to avert OOM.
- */
- unsigned int memcg_low_reclaim:1;
- unsigned int memcg_low_skipped:1;
- @@ -131,21 +134,8 @@ struct scan_control {
- /* for recording the reclaimed slab by now */
- struct reclaim_state reclaim_state;
- - /*
- - * Reclaim pages from a vma. If the page is shared by other tasks
- - * it is zapped from a vma without reclaim so it ends up remaining
- - * on memory until last task zap it.
- - */
- - struct vm_area_struct *target_vma;
- };
- -/*
- - * Number of active kswapd threads
- - */
- -#define DEF_KSWAPD_THREADS_PER_NODE 1
- -int kswapd_threads = DEF_KSWAPD_THREADS_PER_NODE;
- -int kswapd_threads_current = DEF_KSWAPD_THREADS_PER_NODE;
- -
- #ifdef ARCH_HAS_PREFETCH
- #define prefetch_prev_lru_page(_page, _base, _field) \
- do { \
- @@ -485,10 +475,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
- long batch_size = shrinker->batch ? shrinker->batch
- : SHRINK_BATCH;
- long scanned = 0, next_deferred;
- - long min_cache_size = batch_size;
- -
- - if (current_is_kswapd())
- - min_cache_size = 0;
- if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
- nid = 0;
- @@ -568,7 +554,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
- * scanning at high prio and therefore should try to reclaim as much as
- * possible.
- */
- - while (total_scan > min_cache_size ||
- + while (total_scan >= batch_size ||
- total_scan >= freeable) {
- unsigned long ret;
- unsigned long nr_to_scan = min(batch_size, total_scan);
- @@ -614,6 +600,10 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
- unsigned long ret, freed = 0;
- int i;
- + /* allow shrink_slab_memcg for only kswapd */
- + if (!current_is_kswapd())
- + return 0;
- +
- if (!mem_cgroup_online(memcg))
- return 0;
- @@ -642,8 +632,10 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
- /* Call non-slab shrinkers even though kmem is disabled */
- if (!memcg_kmem_enabled() &&
- - !(shrinker->flags & SHRINKER_NONSLAB))
- + !(shrinker->flags & SHRINKER_NONSLAB)) {
- + clear_bit(i, map->map);
- continue;
- + }
- ret = do_shrink_slab(&sc, shrinker, priority);
- if (ret == SHRINK_EMPTY) {
- @@ -1165,8 +1157,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
- goto keep;
- VM_BUG_ON_PAGE(PageActive(page), page);
- - if (pgdat)
- - VM_BUG_ON_PAGE(page_pgdat(page) != pgdat, page);
- nr_pages = compound_nr(page);
- @@ -1179,6 +1169,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
- if (!sc->may_unmap && page_mapped(page))
- goto keep_locked;
- +#ifdef CONFIG_HUGEPAGE_POOL
- + if (PageTransHuge(page))
- + goto keep_locked;
- +#endif
- +
- may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
- (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
- @@ -1253,8 +1248,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
- /* Case 1 above */
- if (current_is_kswapd() &&
- PageReclaim(page) &&
- - (pgdat &&
- - test_bit(PGDAT_WRITEBACK, &pgdat->flags))) {
- + test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
- stat->nr_immediate++;
- goto activate_locked;
- @@ -1326,6 +1320,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
- if (!add_to_swap(page)) {
- if (!PageTransHuge(page))
- goto activate_locked_split;
- +#ifdef CONFIG_HUGEPAGE_POOL_DEBUG
- + BUG();
- +#endif
- /* Fallback to swap normal pages */
- if (split_huge_page_to_list(page,
- page_list))
- @@ -1366,11 +1363,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
- */
- if (page_mapped(page)) {
- enum ttu_flags flags = ttu_flags | TTU_BATCH_FLUSH;
- + bool was_swapbacked = PageSwapBacked(page);
- if (unlikely(PageTransHuge(page)))
- flags |= TTU_SPLIT_HUGE_PMD;
- - if (!try_to_unmap(page, flags, sc->target_vma)) {
- + if (!try_to_unmap(page, flags)) {
- stat->nr_unmap_fail += nr_pages;
- + if (!was_swapbacked && PageSwapBacked(page))
- + stat->nr_lazyfree_fail += nr_pages;
- goto activate_locked;
- }
- }
- @@ -1388,8 +1388,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
- */
- if (page_is_file_cache(page) &&
- (!current_is_kswapd() || !PageReclaim(page) ||
- - (pgdat &&
- - !test_bit(PGDAT_DIRTY, &pgdat->flags)))) {
- + !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
- /*
- * Immediately reclaim when written back.
- * Similar in principal to deactivate_page()
- @@ -1513,14 +1512,13 @@ free_it:
- else
- list_add(&page->lru, &free_pages);
- /*
- - * If pagelist are from multiple nodes, we should decrease
- + * If pagelist are from multiple zones, we should decrease
- * NR_ISOLATED_ANON + x on freed pages in here.
- */
- if (!pgdat)
- dec_node_page_state(page, NR_ISOLATED_ANON +
- - page_is_file_cache(page));
- + page_is_file_cache(page));
- continue;
- -
- activate_locked_split:
- /*
- * The tail pages that are failed to add into swap cache
- @@ -1570,7 +1568,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
- .may_unmap = 1,
- };
- struct reclaim_stat dummy_stat;
- - unsigned long ret;
- + unsigned long nr_reclaimed;
- struct page *page, *next;
- LIST_HEAD(clean_pages);
- @@ -1582,16 +1580,25 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
- }
- }
- - ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
- + nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
- TTU_IGNORE_ACCESS, &dummy_stat, true);
- list_splice(&clean_pages, page_list);
- - mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret);
- - return ret;
- + mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -nr_reclaimed);
- + /*
- + * Since lazyfree pages are isolated from file LRU from the beginning,
- + * they will rotate back to anonymous LRU in the end if it failed to
- + * discard so isolated count will be mismatched.
- + * Compensate the isolated count for both LRU lists.
- + */
- + mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON,
- + dummy_stat.nr_lazyfree_fail);
- + mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE,
- + -dummy_stat.nr_lazyfree_fail);
- +
- + return nr_reclaimed;
- }
- -#ifdef CONFIG_PROCESS_RECLAIM
- -unsigned long reclaim_pages_from_list(struct list_head *page_list,
- - struct vm_area_struct *vma)
- +unsigned long reclaim_pages_from_list(struct list_head *page_list)
- {
- struct scan_control sc = {
- .gfp_mask = GFP_KERNEL,
- @@ -1599,30 +1606,34 @@ unsigned long reclaim_pages_from_list(struct list_head *page_list,
- .may_writepage = 1,
- .may_unmap = 1,
- .may_swap = 1,
- - .target_vma = vma,
- };
- -
- + struct reclaim_stat dummy_stat;
- unsigned long nr_reclaimed;
- - struct reclaim_stat stat;
- - struct page *page;
- + struct page *page, *next;
- + LIST_HEAD(unevictable_pages);
- - list_for_each_entry(page, page_list, lru)
- + list_for_each_entry_safe(page, next, page_list, lru) {
- + if (PageUnevictable(page)) {
- + list_move(&page->lru, &unevictable_pages);
- + continue;
- + }
- ClearPageActive(page);
- + }
- nr_reclaimed = shrink_page_list(page_list, NULL, &sc,
- - TTU_IGNORE_ACCESS, &stat, true);
- + TTU_IGNORE_ACCESS, &dummy_stat, true);
- + list_splice(&unevictable_pages, page_list);
- while (!list_empty(page_list)) {
- page = lru_to_page(page_list);
- list_del(&page->lru);
- dec_node_page_state(page, NR_ISOLATED_ANON +
- - page_is_file_cache(page));
- + page_is_file_cache(page));
- putback_lru_page(page);
- }
- return nr_reclaimed;
- }
- -#endif
- /*
- * Attempt to remove the specified page from its LRU. Only take this page
- @@ -1770,7 +1781,12 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
- nr_pages = compound_nr(page);
- total_scan += nr_pages;
- +#ifdef CONFIG_HUGEPAGE_POOL
- + if (page_zonenum(page) > sc->reclaim_idx
- + || PageTransHuge(page)) {
- +#else
- if (page_zonenum(page) > sc->reclaim_idx) {
- +#endif
- list_move(&page->lru, &pages_skipped);
- nr_skipped[page_zonenum(page)] += nr_pages;
- continue;
- @@ -2026,13 +2042,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
- if (stalled)
- return 0;
- - /* We are about to die and free our memory. Return now. */
- - if (fatal_signal_pending(current))
- - return SWAP_CLUSTER_MAX;
- -
- /* wait a bit for the reclaimer. */
- msleep(100);
- stalled = true;
- +
- + /* We are about to die and free our memory. Return now. */
- + if (fatal_signal_pending(current))
- + return SWAP_CLUSTER_MAX;
- }
- lru_add_drain();
- @@ -2355,6 +2371,214 @@ enum scan_balance {
- SCAN_FILE,
- };
- +/* mem_boost throttles only kswapd's behavior */
- +enum mem_boost {
- + NO_BOOST,
- + BOOST_MID = 1,
- + BOOST_HIGH = 2,
- + BOOST_KILL = 3,
- +};
- +static int mem_boost_mode = NO_BOOST;
- +static unsigned long last_mode_change;
- +static bool am_app_launch = false;
- +
- +#define MEM_BOOST_MAX_TIME (5 * HZ) /* 5 sec */
- +
- +#if CONFIG_KSWAPD_CPU
- +static int set_kswapd_cpu_affinity_as_config(void);
- +static int set_kswapd_cpu_affinity_as_boost(void);
- +#endif
- +
- +#ifdef CONFIG_SYSFS
- +static ssize_t mem_boost_mode_show(struct kobject *kobj,
- + struct kobj_attribute *attr, char *buf)
- +{
- + if (mem_boost_mode != NO_BOOST &&
- + time_after(jiffies, last_mode_change + MEM_BOOST_MAX_TIME)) {
- + mem_boost_mode = NO_BOOST;
- +#ifdef CONFIG_KSWAPD_CPU
- + set_kswapd_cpu_affinity_as_config();
- +#endif
- + }
- + return sprintf(buf, "%d\n", mem_boost_mode);
- +}
- +
- +static ssize_t mem_boost_mode_store(struct kobject *kobj,
- + struct kobj_attribute *attr,
- + const char *buf, size_t count)
- +{
- + int mode;
- + int err;
- +
- + err = kstrtoint(buf, 10, &mode);
- + if (err || mode > BOOST_KILL || mode < NO_BOOST)
- + return -EINVAL;
- +
- + mem_boost_mode = mode;
- + last_mode_change = jiffies;
- +#ifdef CONFIG_ION_RBIN_HEAP
- + if (mem_boost_mode >= BOOST_HIGH)
- + wake_ion_rbin_heap_prereclaim();
- +#endif
- +#if CONFIG_KSWAPD_CPU
- + if (mem_boost_mode >= BOOST_HIGH)
- + set_kswapd_cpu_affinity_as_boost();
- + else if (mem_boost_mode == NO_BOOST)
- + set_kswapd_cpu_affinity_as_config();
- +#endif
- + return count;
- +}
- +
- +static inline bool mem_boost_pgdat_wmark(struct pglist_data *pgdat)
- +{
- + int z;
- + struct zone *zone;
- + unsigned long mark;
- +
- + for (z = 0; z < MAX_NR_ZONES; z++) {
- + zone = &pgdat->node_zones[z];
- + if (!managed_zone(zone))
- + continue;
- + mark = low_wmark_pages(zone); //TODO: low, high, or (low + high)/2
- + if (zone_watermark_ok_safe(zone, 0, mark, 0))
- + return true;
- + }
- + return false;
- +}
- +
- +#define MB_TO_PAGES(x) ((x) << (20 - PAGE_SHIFT))
- +#define GB_TO_PAGES(x) ((x) << (30 - PAGE_SHIFT))
- +static unsigned long low_threshold;
- +
- +static inline bool is_too_low_file(struct pglist_data *pgdat)
- +{
- + unsigned long pgdatfile;
- + if (!low_threshold) {
- + if (totalram_pages() > GB_TO_PAGES(2))
- + low_threshold = MB_TO_PAGES(600);
- + else if (totalram_pages() > GB_TO_PAGES(1))
- + low_threshold = MB_TO_PAGES(300);
- + else
- + low_threshold = MB_TO_PAGES(200);
- + }
- +
- + pgdatfile = node_page_state(pgdat, NR_ACTIVE_FILE) +
- + node_page_state(pgdat, NR_INACTIVE_FILE);
- + return pgdatfile < low_threshold;
- +}
- +
- +inline bool need_memory_boosting(struct pglist_data *pgdat)
- +{
- + bool ret;
- +
- + if (mem_boost_mode != NO_BOOST &&
- + (time_after(jiffies, last_mode_change + MEM_BOOST_MAX_TIME) ||
- + is_too_low_file(pgdat))) {
- + mem_boost_mode = NO_BOOST;
- +#if CONFIG_KSWAPD_CPU
- + set_kswapd_cpu_affinity_as_config();
- +#endif
- + }
- +
- + switch (mem_boost_mode) {
- + case BOOST_KILL:
- + case BOOST_HIGH:
- + ret = true;
- + break;
- + case BOOST_MID:
- +#ifndef CONFIG_NEED_MULTIPLE_NODES
- + if (!pgdat)
- + pgdat = &contig_page_data;
- +#endif
- + ret = mem_boost_pgdat_wmark(pgdat) ? false : true;
- + break;
- + case NO_BOOST:
- + default:
- + ret = false;
- + break;
- + }
- + return ret;
- +}
- +
- +ATOMIC_NOTIFIER_HEAD(am_app_launch_notifier);
- +
- +int am_app_launch_notifier_register(struct notifier_block *nb)
- +{
- + return atomic_notifier_chain_register(&am_app_launch_notifier, nb);
- +}
- +
- +int am_app_launch_notifier_unregister(struct notifier_block *nb)
- +{
- + return atomic_notifier_chain_unregister(&am_app_launch_notifier, nb);
- +}
- +
- +static ssize_t am_app_launch_show(struct kobject *kobj,
- + struct kobj_attribute *attr, char *buf)
- +{
- + int ret;
- +
- + ret = am_app_launch ? 1 : 0;
- + return sprintf(buf, "%d\n", ret);
- +}
- +
- +static int notify_app_launch_started(void)
- +{
- + trace_printk("%s\n", "am_app_launch started");
- + atomic_notifier_call_chain(&am_app_launch_notifier, 1, NULL);
- + return 0;
- +}
- +
- +static int notify_app_launch_finished(void)
- +{
- + trace_printk("%s\n", "am_app_launch finished");
- + atomic_notifier_call_chain(&am_app_launch_notifier, 0, NULL);
- + return 0;
- +}
- +
- +static ssize_t am_app_launch_store(struct kobject *kobj,
- + struct kobj_attribute *attr,
- + const char *buf, size_t count)
- +{
- + int mode;
- + int err;
- + bool am_app_launch_new;
- +
- + err = kstrtoint(buf, 10, &mode);
- + if (err || (mode != 0 && mode != 1))
- + return -EINVAL;
- +
- + am_app_launch_new = mode ? true : false;
- + trace_printk("am_app_launch %d -> %d\n", am_app_launch,
- + am_app_launch_new);
- + if (am_app_launch != am_app_launch_new) {
- + if (am_app_launch_new)
- + notify_app_launch_started();
- + else
- + notify_app_launch_finished();
- + }
- + am_app_launch = am_app_launch_new;
- +
- + return count;
- +}
- +
- +#define MEM_BOOST_ATTR(_name) \
- + static struct kobj_attribute _name##_attr = \
- + __ATTR(_name, 0644, _name##_show, _name##_store)
- +MEM_BOOST_ATTR(mem_boost_mode);
- +MEM_BOOST_ATTR(am_app_launch);
- +
- +static struct attribute *vmscan_attrs[] = {
- + &mem_boost_mode_attr.attr,
- + &am_app_launch_attr.attr,
- + NULL,
- +};
- +
- +static struct attribute_group vmscan_attr_group = {
- + .attrs = vmscan_attrs,
- + .name = "vmscan",
- +};
- +#endif
- +
- /*
- * Determine how aggressively the anon and file LRU lists should be
- * scanned. The relative value of each set of LRU lists is determined
- @@ -2449,6 +2673,11 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
- }
- }
- + if (current_is_kswapd() && need_memory_boosting(pgdat)) {
- + scan_balance = SCAN_FILE;
- + goto out;
- + }
- +
- /*
- * If there is enough inactive page cache, i.e. if the size of the
- * inactive list is greater than that of the active list *and* the
- @@ -2522,14 +2751,14 @@ out:
- for_each_evictable_lru(lru) {
- int file = is_file_lru(lru);
- unsigned long lruvec_size;
- + unsigned long low, min;
- unsigned long scan;
- - unsigned long protection;
- lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
- - protection = mem_cgroup_protection(memcg,
- - sc->memcg_low_reclaim);
- + mem_cgroup_protection(sc->target_mem_cgroup, memcg,
- + &min, &low);
- - if (protection) {
- + if (min || low) {
- /*
- * Scale a cgroup's reclaim pressure by proportioning
- * its current usage to its memory.low or memory.min
- @@ -2560,6 +2789,15 @@ out:
- * hard protection.
- */
- unsigned long cgroup_size = mem_cgroup_size(memcg);
- + unsigned long protection;
- +
- + /* memory.low scaling, make sure we retry before OOM */
- + if (!sc->memcg_low_reclaim && low > min) {
- + protection = low;
- + sc->memcg_low_skipped = 1;
- + } else {
- + protection = min;
- + }
- /* Avoid TOCTOU with earlier protection check */
- cgroup_size = max(cgroup_size, protection);
- @@ -2621,6 +2859,65 @@ out:
- }
- }
- +#ifdef CONFIG_MEMCG_HEIMDALL
- +void forced_shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg,
- + int type, unsigned long nr_requested)
- +{
- + struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- + unsigned long nr[NR_LRU_LISTS] = {0,};
- + unsigned long nr_to_scan;
- + enum lru_list lru;
- + unsigned long nr_reclaimed = 0;
- + struct blk_plug plug;
- + unsigned long anon = 0, file = 0;
- + struct scan_control sc = {
- + .nr_to_reclaim = SWAP_CLUSTER_MAX,
- + .gfp_mask = GFP_KERNEL,
- + .reclaim_idx = MAX_NR_ZONES - 1,
- + .target_mem_cgroup = memcg,
- + .priority = DEF_PRIORITY,
- + .may_writepage = !laptop_mode,
- + .may_unmap = 1,
- + .may_swap = 1,
- + };
- +
- + if (type == MEMCG_HEIMDALL_SHRINK_ANON) {
- + anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
- + lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
- + nr[LRU_ACTIVE_ANON] = nr[LRU_INACTIVE_ANON] = anon;
- + nr[LRU_ACTIVE_FILE] = nr[LRU_INACTIVE_FILE] = 0;
- + } else if (type == MEMCG_HEIMDALL_SHRINK_FILE) {
- + file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) +
- + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
- + nr[LRU_ACTIVE_ANON] = nr[LRU_INACTIVE_ANON] = 0;
- + nr[LRU_ACTIVE_FILE] = nr[LRU_INACTIVE_FILE] = file;
- + }
- +
- + trace_printk("%s heimdall start %d %lu %lu %lu\n", __func__, type, nr_requested, anon, file);
- + blk_start_plug(&plug);
- + while (nr[LRU_INACTIVE_ANON] > 0 || nr[LRU_INACTIVE_FILE] > 0) {
- + for_each_evictable_lru(lru) {
- + if (nr[lru]) {
- + nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
- + nr[lru] -= nr_to_scan;
- +
- + nr_reclaimed += shrink_list(lru, nr_to_scan,
- + lruvec, &sc);
- + }
- + }
- +
- + if (nr_reclaimed >= nr_requested)
- + break;
- +
- + cond_resched();
- + }
- + blk_finish_plug(&plug);
- + sc.nr_reclaimed += nr_reclaimed;
- + trace_printk("%s end %d %lu %lu %lu\n", __func__, type, nr_reclaimed,
- + nr[LRU_INACTIVE_ANON], nr[LRU_INACTIVE_FILE]);
- +}
- +#endif
- +
- /*
- * This is a basic per-node page freer. Used by both kswapd and direct reclaim.
- */
- @@ -2731,6 +3028,9 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
- blk_finish_plug(&plug);
- sc->nr_reclaimed += nr_reclaimed;
- + if (need_memory_boosting(NULL))
- + return;
- +
- /*
- * Even if we did not try to evict anon pages at all, we want to
- * rebalance the anon lru active/inactive ratio.
- @@ -3347,7 +3647,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
- .priority = DEF_PRIORITY,
- .may_writepage = !laptop_mode,
- .may_unmap = 1,
- +#ifdef CONFIG_DIRECT_RECLAIM_FILE_PAGES_ONLY
- + .may_swap = 0,
- +#else
- .may_swap = 1,
- +#endif
- };
- /*
- @@ -3954,6 +4258,65 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
- finish_wait(&pgdat->kswapd_wait, &wait);
- }
- +#if CONFIG_KSWAPD_CPU
- +static struct cpumask kswapd_cpumask;
- +
- +#define KSWAPD_CPU_BIG 0xF0
- +static struct cpumask kswapd_cpumask_boost;
- +
- +static void init_kswapd_cpumask(void)
- +{
- + int i;
- +
- + cpumask_clear(&kswapd_cpumask);
- + for (i = 0; i < nr_cpu_ids; i++) {
- + if (CONFIG_KSWAPD_CPU & (1 << i))
- + cpumask_set_cpu(i, &kswapd_cpumask);
- + }
- +
- + cpumask_clear(&kswapd_cpumask_boost);
- + for (i = 0; i < nr_cpu_ids; i++) {
- + if (KSWAPD_CPU_BIG & (1 << i))
- + cpumask_set_cpu(i, &kswapd_cpumask_boost);
- + }
- +}
- +
- +/* follow like kswapd_cpu_online(unsigned int cpu) */
- +static int set_kswapd_cpu_affinity_as_config(void)
- +{
- + int nid;
- +
- + for_each_node_state(nid, N_MEMORY) {
- + pg_data_t *pgdat = NODE_DATA(nid);
- + const struct cpumask *mask;
- +
- + mask = &kswapd_cpumask;
- +
- + if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
- + /* One of our CPUs online: restore mask */
- + set_cpus_allowed_ptr(pgdat->kswapd, mask);
- + }
- + return 0;
- +}
- +
- +static int set_kswapd_cpu_affinity_as_boost(void)
- +{
- + int nid;
- +
- + for_each_node_state(nid, N_MEMORY) {
- + pg_data_t *pgdat = NODE_DATA(nid);
- + const struct cpumask *mask;
- +
- + mask = &kswapd_cpumask_boost;
- +
- + if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
- + /* One of our CPUs online: restore mask */
- + set_cpus_allowed_ptr(pgdat->kswapd, mask);
- + }
- + return 0;
- +}
- +#endif
- +
- /*
- * The background pageout daemon, started as a kernel thread
- * from the init process.
- @@ -3973,7 +4336,11 @@ static int kswapd(void *p)
- unsigned int classzone_idx = MAX_NR_ZONES - 1;
- pg_data_t *pgdat = (pg_data_t*)p;
- struct task_struct *tsk = current;
- +#if CONFIG_KSWAPD_CPU
- + const struct cpumask *cpumask = &kswapd_cpumask;
- +#else
- const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
- +#endif
- if (!cpumask_empty(cpumask))
- set_cpus_allowed_ptr(tsk, cpumask);
- @@ -4133,116 +4500,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
- }
- #endif /* CONFIG_HIBERNATION */
- -#ifdef CONFIG_MULTIPLE_KSWAPD
- -static void update_kswapd_threads_node(int nid)
- -{
- - pg_data_t *pgdat;
- - int drop, increase;
- - int last_idx, start_idx, hid;
- - int nr_threads = kswapd_threads_current;
- -
- - pgdat = NODE_DATA(nid);
- - last_idx = nr_threads - 1;
- - if (kswapd_threads < nr_threads) {
- - drop = nr_threads - kswapd_threads;
- - for (hid = last_idx; hid > (last_idx - drop); hid--) {
- - if (pgdat->mkswapd[hid]) {
- - kthread_stop(pgdat->mkswapd[hid]);
- - pgdat->mkswapd[hid] = NULL;
- - }
- - }
- - } else {
- - increase = kswapd_threads - nr_threads;
- - start_idx = last_idx + 1;
- - for (hid = start_idx; hid < (start_idx + increase); hid++) {
- - pgdat->mkswapd[hid] = kthread_run(kswapd, pgdat,
- - "kswapd%d:%d", nid, hid);
- - if (IS_ERR(pgdat->mkswapd[hid])) {
- - pr_err("Failed to start kswapd%d on node %d\n",
- - hid, nid);
- - pgdat->mkswapd[hid] = NULL;
- - /*
- - * We are out of resources. Do not start any
- - * more threads.
- - */
- - break;
- - }
- - }
- - }
- -}
- -
- -void update_kswapd_threads(void)
- -{
- - int nid;
- -
- - if (kswapd_threads_current == kswapd_threads)
- - return;
- -
- - /*
- - * Hold the memory hotplug lock to avoid racing with memory
- - * hotplug initiated updates
- - */
- - mem_hotplug_begin();
- - for_each_node_state(nid, N_MEMORY)
- - update_kswapd_threads_node(nid);
- -
- - pr_info("kswapd_thread count changed, old:%d new:%d\n",
- - kswapd_threads_current, kswapd_threads);
- - kswapd_threads_current = kswapd_threads;
- - mem_hotplug_done();
- -}
- -
- -static int multi_kswapd_run(int nid)
- -{
- - pg_data_t *pgdat = NODE_DATA(nid);
- - int hid, nr_threads = kswapd_threads;
- - int ret = 0;
- -
- - pgdat->mkswapd[0] = pgdat->kswapd;
- - for (hid = 1; hid < nr_threads; ++hid) {
- - pgdat->mkswapd[hid] = kthread_run(kswapd, pgdat, "kswapd%d:%d",
- - nid, hid);
- - if (IS_ERR(pgdat->mkswapd[hid])) {
- - /* failure at boot is fatal */
- - WARN_ON(system_state < SYSTEM_RUNNING);
- - pr_err("Failed to start kswapd%d on node %d\n",
- - hid, nid);
- - ret = PTR_ERR(pgdat->mkswapd[hid]);
- - pgdat->mkswapd[hid] = NULL;
- - }
- - }
- - kswapd_threads_current = nr_threads;
- -
- - return ret;
- -}
- -
- -static void multi_kswapd_stop(int nid)
- -{
- - int hid = 0;
- - int nr_threads = kswapd_threads_current;
- - struct task_struct *kswapd;
- -
- - NODE_DATA(nid)->mkswapd[hid] = NULL;
- - for (hid = 1; hid < nr_threads; hid++) {
- - kswapd = NODE_DATA(nid)->mkswapd[hid];
- - if (kswapd) {
- - kthread_stop(kswapd);
- - NODE_DATA(nid)->mkswapd[hid] = NULL;
- - }
- - }
- -}
- -
- -static void multi_kswapd_cpu_online(pg_data_t *pgdat,
- - const struct cpumask *mask)
- -{
- - int hid;
- - int nr_threads = kswapd_threads_current;
- -
- - for (hid = 1; hid < nr_threads; hid++)
- - set_cpus_allowed_ptr(pgdat->mkswapd[hid], mask);
- -}
- -#endif
- -
- /* It's optimal to keep kswapds on the same CPUs as their memory, but
- not required for correctness. So if the last cpu in a node goes
- away, we get changed to run anywhere: as the first one comes back,
- @@ -4255,13 +4512,15 @@ static int kswapd_cpu_online(unsigned int cpu)
- pg_data_t *pgdat = NODE_DATA(nid);
- const struct cpumask *mask;
- +#if CONFIG_KSWAPD_CPU
- + mask = &kswapd_cpumask;
- +#else
- mask = cpumask_of_node(pgdat->node_id);
- +#endif
- - if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids) {
- + if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
- /* One of our CPUs online: restore mask */
- set_cpus_allowed_ptr(pgdat->kswapd, mask);
- - multi_kswapd_cpu_online(pgdat, mask);
- - }
- }
- return 0;
- }
- @@ -4278,17 +4537,14 @@ int kswapd_run(int nid)
- if (pgdat->kswapd)
- return 0;
- - pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d:0", nid);
- + pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
- if (IS_ERR(pgdat->kswapd)) {
- /* failure at boot is fatal */
- BUG_ON(system_state < SYSTEM_RUNNING);
- pr_err("Failed to start kswapd on node %d\n", nid);
- ret = PTR_ERR(pgdat->kswapd);
- pgdat->kswapd = NULL;
- - return ret;
- }
- - ret = multi_kswapd_run(nid);
- -
- return ret;
- }
- @@ -4304,14 +4560,15 @@ void kswapd_stop(int nid)
- kthread_stop(kswapd);
- NODE_DATA(nid)->kswapd = NULL;
- }
- -
- - multi_kswapd_stop(nid);
- }
- static int __init kswapd_init(void)
- {
- int nid, ret;
- +#if CONFIG_KSWAPD_CPU
- + init_kswapd_cpumask();
- +#endif
- swap_setup();
- for_each_node_state(nid, N_MEMORY)
- kswapd_run(nid);
- @@ -4319,6 +4576,10 @@ static int __init kswapd_init(void)
- "mm/vmscan:online", kswapd_cpu_online,
- NULL);
- WARN_ON(ret < 0);
- +#ifdef CONFIG_SYSFS
- + if (sysfs_create_group(mm_kobj, &vmscan_attr_group))
- + pr_err("vmscan: register sysfs failed\n");
- +#endif
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement