2008-10-19 10:26:30 +07:00
|
|
|
#ifndef LINUX_MM_INLINE_H
|
|
|
|
#define LINUX_MM_INLINE_H
|
|
|
|
|
|
|
|
/**
|
|
|
|
* page_is_file_cache - should the page be on a file LRU or anon LRU?
|
|
|
|
* @page: the page to test
|
|
|
|
*
|
2008-10-19 10:26:32 +07:00
|
|
|
* Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
|
2008-10-19 10:26:30 +07:00
|
|
|
* or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
|
|
|
|
* Used by functions that manipulate the LRU lists, to sort a page
|
|
|
|
* onto the right LRU list.
|
|
|
|
*
|
|
|
|
* We would like to get this info without a page flag, but the state
|
|
|
|
* needs to survive until the page is last deleted from the LRU, which
|
|
|
|
* could be as far down as __page_cache_release.
|
|
|
|
*/
|
|
|
|
static inline int page_is_file_cache(struct page *page)
|
|
|
|
{
|
|
|
|
if (PageSwapBacked(page))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* The page is page cache backed by a normal filesystem. */
|
2008-10-19 10:26:32 +07:00
|
|
|
return LRU_FILE;
|
2008-10-19 10:26:30 +07:00
|
|
|
}
|
|
|
|
|
2008-10-19 10:26:14 +07:00
|
|
|
static inline void
|
|
|
|
add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
|
|
|
|
{
|
|
|
|
list_add(&page->lru, &zone->lru[l].list);
|
|
|
|
__inc_zone_state(zone, NR_LRU_BASE + l);
|
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-08 09:08:01 +07:00
|
|
|
mem_cgroup_add_lru_list(page, l);
|
2008-10-19 10:26:14 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
|
|
|
|
{
|
|
|
|
list_del(&page->lru);
|
|
|
|
__dec_zone_state(zone, NR_LRU_BASE + l);
|
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-08 09:08:01 +07:00
|
|
|
mem_cgroup_del_lru_list(page, l);
|
2008-10-19 10:26:14 +07:00
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline void
|
|
|
|
del_page_from_lru(struct zone *zone, struct page *page)
|
|
|
|
{
|
2008-10-19 10:26:32 +07:00
|
|
|
enum lru_list l = LRU_BASE;
|
2008-10-19 10:26:14 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
list_del(&page->lru);
|
Unevictable LRU Infrastructure
When the system contains lots of mlocked or otherwise unevictable pages,
the pageout code (kswapd) can spend lots of time scanning over these
pages. Worse still, the presence of lots of unevictable pages can confuse
kswapd into thinking that more aggressive pageout modes are required,
resulting in all kinds of bad behaviour.
Infrastructure to manage pages excluded from reclaim--i.e., hidden from
vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to
maintain "unevictable" pages on a separate per-zone LRU list, to "hide"
them from vmscan.
Kosaki Motohiro added the support for the memory controller unevictable
lru list.
Pages on the unevictable list have both PG_unevictable and PG_lru set.
Thus, PG_unevictable is analogous to and mutually exclusive with
PG_active--it specifies which LRU list the page is on.
The unevictable infrastructure is enabled by a new mm Kconfig option
[CONFIG_]UNEVICTABLE_LRU.
A new function 'page_evictable(page, vma)' in vmscan.c tests whether or
not a page may be evictable. Subsequent patches will add the various
!evictable tests. We'll want to keep these tests light-weight for use in
shrink_active_list() and, possibly, the fault path.
To avoid races between tasks putting pages [back] onto an LRU list and
tasks that might be moving the page from non-evictable to evictable state,
the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()'
-- tests the "evictability" of a page after placing it on the LRU, before
dropping the reference. If the page has become unevictable,
putback_lru_page() will redo the 'putback', thus moving the page to the
unevictable list. This way, we avoid "stranding" evictable pages on the
unevictable list.
[akpm@linux-foundation.org: fix fallout from out-of-order merge]
[riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build]
[nishimura@mxp.nes.nec.co.jp: remove redundant mapping check]
[kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework]
[kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c]
[kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure]
[kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch]
[kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Debugged-by: Benjamin Kidwell <benjkidwell@yahoo.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-19 10:26:39 +07:00
|
|
|
if (PageUnevictable(page)) {
|
|
|
|
__ClearPageUnevictable(page);
|
|
|
|
l = LRU_UNEVICTABLE;
|
|
|
|
} else {
|
|
|
|
if (PageActive(page)) {
|
|
|
|
__ClearPageActive(page);
|
|
|
|
l += LRU_ACTIVE;
|
|
|
|
}
|
|
|
|
l += page_is_file_cache(page);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
2008-10-19 10:26:14 +07:00
|
|
|
__dec_zone_state(zone, NR_LRU_BASE + l);
|
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-08 09:08:01 +07:00
|
|
|
mem_cgroup_del_lru_list(page, l);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
2006-01-08 16:00:45 +07:00
|
|
|
|
2008-10-19 10:26:14 +07:00
|
|
|
/**
|
|
|
|
* page_lru - which LRU list should a page be on?
|
|
|
|
* @page: the page to test
|
|
|
|
*
|
|
|
|
* Returns the LRU list a page should be on, as an index
|
|
|
|
* into the array of LRU lists.
|
|
|
|
*/
|
|
|
|
static inline enum lru_list page_lru(struct page *page)
|
|
|
|
{
|
|
|
|
enum lru_list lru = LRU_BASE;
|
|
|
|
|
Unevictable LRU Infrastructure
When the system contains lots of mlocked or otherwise unevictable pages,
the pageout code (kswapd) can spend lots of time scanning over these
pages. Worse still, the presence of lots of unevictable pages can confuse
kswapd into thinking that more aggressive pageout modes are required,
resulting in all kinds of bad behaviour.
Infrastructure to manage pages excluded from reclaim--i.e., hidden from
vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to
maintain "unevictable" pages on a separate per-zone LRU list, to "hide"
them from vmscan.
Kosaki Motohiro added the support for the memory controller unevictable
lru list.
Pages on the unevictable list have both PG_unevictable and PG_lru set.
Thus, PG_unevictable is analogous to and mutually exclusive with
PG_active--it specifies which LRU list the page is on.
The unevictable infrastructure is enabled by a new mm Kconfig option
[CONFIG_]UNEVICTABLE_LRU.
A new function 'page_evictable(page, vma)' in vmscan.c tests whether or
not a page may be evictable. Subsequent patches will add the various
!evictable tests. We'll want to keep these tests light-weight for use in
shrink_active_list() and, possibly, the fault path.
To avoid races between tasks putting pages [back] onto an LRU list and
tasks that might be moving the page from non-evictable to evictable state,
the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()'
-- tests the "evictability" of a page after placing it on the LRU, before
dropping the reference. If the page has become unevictable,
putback_lru_page() will redo the 'putback', thus moving the page to the
unevictable list. This way, we avoid "stranding" evictable pages on the
unevictable list.
[akpm@linux-foundation.org: fix fallout from out-of-order merge]
[riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build]
[nishimura@mxp.nes.nec.co.jp: remove redundant mapping check]
[kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework]
[kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c]
[kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure]
[kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch]
[kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Debugged-by: Benjamin Kidwell <benjkidwell@yahoo.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-19 10:26:39 +07:00
|
|
|
if (PageUnevictable(page))
|
|
|
|
lru = LRU_UNEVICTABLE;
|
|
|
|
else {
|
|
|
|
if (PageActive(page))
|
|
|
|
lru += LRU_ACTIVE;
|
|
|
|
lru += page_is_file_cache(page);
|
|
|
|
}
|
2008-10-19 10:26:14 +07:00
|
|
|
|
|
|
|
return lru;
|
|
|
|
}
|
2008-10-19 10:26:30 +07:00
|
|
|
|
2008-10-19 10:26:34 +07:00
|
|
|
/**
|
|
|
|
* inactive_anon_is_low - check if anonymous pages need to be deactivated
|
|
|
|
* @zone: zone to check
|
|
|
|
*
|
|
|
|
* Returns true if the zone does not have enough inactive anon pages,
|
|
|
|
* meaning some active anon pages need to be deactivated.
|
|
|
|
*/
|
|
|
|
static inline int inactive_anon_is_low(struct zone *zone)
|
|
|
|
{
|
|
|
|
unsigned long active, inactive;
|
|
|
|
|
|
|
|
active = zone_page_state(zone, NR_ACTIVE_ANON);
|
|
|
|
inactive = zone_page_state(zone, NR_INACTIVE_ANON);
|
|
|
|
|
|
|
|
if (inactive * zone->inactive_ratio < active)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2008-10-19 10:26:30 +07:00
|
|
|
#endif
|