2007-10-16 15:26:11 +07:00
|
|
|
/*
|
|
|
|
* linux/mm/page_isolation.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/page-isolation.h>
|
|
|
|
#include <linux/pageblock-flags.h>
|
2012-08-01 06:43:50 +07:00
|
|
|
#include <linux/memory.h>
|
2007-10-16 15:26:11 +07:00
|
|
|
#include "internal.h"
|
|
|
|
|
memory-hotplug: fix kswapd looping forever problem
When hotplug offlining happens on zone A, it starts to mark freed page as
MIGRATE_ISOLATE type in buddy for preventing further allocation.
(MIGRATE_ISOLATE is very irony type because it's apparently on buddy but
we can't allocate them).
When the memory shortage happens during hotplug offlining, current task
starts to reclaim, then wake up kswapd. Kswapd checks watermark, then go
sleep because current zone_watermark_ok_safe doesn't consider
MIGRATE_ISOLATE freed page count. Current task continue to reclaim in
direct reclaim path without kswapd's helping. The problem is that
zone->all_unreclaimable is set by only kswapd so that current task would
be looping forever like below.
__alloc_pages_slowpath
restart:
wake_all_kswapd
rebalance:
__alloc_pages_direct_reclaim
do_try_to_free_pages
if global_reclaim && !all_unreclaimable
return 1; /* It means we did did_some_progress */
skip __alloc_pages_may_oom
should_alloc_retry
goto rebalance;
If we apply KOSAKI's patch[1] which doesn't depends on kswapd about
setting zone->all_unreclaimable, we can solve this problem by killing some
task in direct reclaim path. But it doesn't wake up kswapd, still. It
could be a problem still if other subsystem needs GFP_ATOMIC request. So
kswapd should consider MIGRATE_ISOLATE when it calculate free pages BEFORE
going sleep.
This patch counts the number of MIGRATE_ISOLATE page block and
zone_watermark_ok_safe will consider it if the system has such blocks
(fortunately, it's very rare so no problem in POV overhead and kswapd is
never hotpath).
Copy/modify from Mel's quote
"
Ideal solution would be "allocating" the pageblock.
It would keep the free space accounting as it is but historically,
memory hotplug didn't allocate pages because it would be difficult to
detect if a pageblock was isolated or if part of some balloon.
Allocating just full pageblocks would work around this, However,
it would play very badly with CMA.
"
[1] http://lkml.org/lkml/2012/6/14/74
[akpm@linux-foundation.org: simplify nr_zone_isolate_freepages(), rework zone_watermark_ok_safe() comment, simplify set_pageblock_isolate() and restore_pageblock_isolate()]
[akpm@linux-foundation.org: fix CONFIG_MEMORY_ISOLATION=n build]
Signed-off-by: Minchan Kim <minchan@kernel.org>
Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: Aaditya Kumar <aaditya.kumar.30@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-08-01 06:43:56 +07:00
|
|
|
/* called while holding zone->lock */
|
|
|
|
static void set_pageblock_isolate(struct page *page)
|
|
|
|
{
|
|
|
|
if (get_pageblock_migratetype(page) == MIGRATE_ISOLATE)
|
|
|
|
return;
|
|
|
|
|
|
|
|
set_pageblock_migratetype(page, MIGRATE_ISOLATE);
|
|
|
|
page_zone(page)->nr_pageblock_isolate++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* called while holding zone->lock */
|
|
|
|
static void restore_pageblock_isolate(struct page *page, int migratetype)
|
|
|
|
{
|
|
|
|
struct zone *zone = page_zone(page);
|
|
|
|
if (WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE))
|
|
|
|
return;
|
|
|
|
|
|
|
|
BUG_ON(zone->nr_pageblock_isolate <= 0);
|
|
|
|
set_pageblock_migratetype(page, migratetype);
|
|
|
|
zone->nr_pageblock_isolate--;
|
|
|
|
}
|
|
|
|
|
2012-08-01 06:43:50 +07:00
|
|
|
int set_migratetype_isolate(struct page *page)
|
|
|
|
{
|
|
|
|
struct zone *zone;
|
|
|
|
unsigned long flags, pfn;
|
|
|
|
struct memory_isolate_notify arg;
|
|
|
|
int notifier_ret;
|
|
|
|
int ret = -EBUSY;
|
|
|
|
|
|
|
|
zone = page_zone(page);
|
|
|
|
|
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
|
|
|
|
pfn = page_to_pfn(page);
|
|
|
|
arg.start_pfn = pfn;
|
|
|
|
arg.nr_pages = pageblock_nr_pages;
|
|
|
|
arg.pages_found = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It may be possible to isolate a pageblock even if the
|
|
|
|
* migratetype is not MIGRATE_MOVABLE. The memory isolation
|
|
|
|
* notifier chain is used by balloon drivers to return the
|
|
|
|
* number of pages in a range that are held by the balloon
|
|
|
|
* driver to shrink memory. If all the pages are accounted for
|
|
|
|
* by balloons, are free, or on the LRU, isolation can continue.
|
|
|
|
* Later, for example, when memory hotplug notifier runs, these
|
|
|
|
* pages reported as "can be isolated" should be isolated(freed)
|
|
|
|
* by the balloon driver through the memory notifier chain.
|
|
|
|
*/
|
|
|
|
notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
|
|
|
|
notifier_ret = notifier_to_errno(notifier_ret);
|
|
|
|
if (notifier_ret)
|
|
|
|
goto out;
|
|
|
|
/*
|
|
|
|
* FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
|
|
|
|
* We just check MOVABLE pages.
|
|
|
|
*/
|
|
|
|
if (!has_unmovable_pages(zone, page, arg.pages_found))
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* immobile means "not-on-lru" paes. If immobile is larger than
|
|
|
|
* removable-by-driver pages reported by notifier, we'll fail.
|
|
|
|
*/
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (!ret) {
|
2012-10-09 06:32:00 +07:00
|
|
|
unsigned long nr_pages;
|
2012-10-09 06:32:02 +07:00
|
|
|
int migratetype = get_pageblock_migratetype(page);
|
2012-10-09 06:32:00 +07:00
|
|
|
|
memory-hotplug: fix kswapd looping forever problem
When hotplug offlining happens on zone A, it starts to mark freed page as
MIGRATE_ISOLATE type in buddy for preventing further allocation.
(MIGRATE_ISOLATE is very irony type because it's apparently on buddy but
we can't allocate them).
When the memory shortage happens during hotplug offlining, current task
starts to reclaim, then wake up kswapd. Kswapd checks watermark, then go
sleep because current zone_watermark_ok_safe doesn't consider
MIGRATE_ISOLATE freed page count. Current task continue to reclaim in
direct reclaim path without kswapd's helping. The problem is that
zone->all_unreclaimable is set by only kswapd so that current task would
be looping forever like below.
__alloc_pages_slowpath
restart:
wake_all_kswapd
rebalance:
__alloc_pages_direct_reclaim
do_try_to_free_pages
if global_reclaim && !all_unreclaimable
return 1; /* It means we did did_some_progress */
skip __alloc_pages_may_oom
should_alloc_retry
goto rebalance;
If we apply KOSAKI's patch[1] which doesn't depends on kswapd about
setting zone->all_unreclaimable, we can solve this problem by killing some
task in direct reclaim path. But it doesn't wake up kswapd, still. It
could be a problem still if other subsystem needs GFP_ATOMIC request. So
kswapd should consider MIGRATE_ISOLATE when it calculate free pages BEFORE
going sleep.
This patch counts the number of MIGRATE_ISOLATE page block and
zone_watermark_ok_safe will consider it if the system has such blocks
(fortunately, it's very rare so no problem in POV overhead and kswapd is
never hotpath).
Copy/modify from Mel's quote
"
Ideal solution would be "allocating" the pageblock.
It would keep the free space accounting as it is but historically,
memory hotplug didn't allocate pages because it would be difficult to
detect if a pageblock was isolated or if part of some balloon.
Allocating just full pageblocks would work around this, However,
it would play very badly with CMA.
"
[1] http://lkml.org/lkml/2012/6/14/74
[akpm@linux-foundation.org: simplify nr_zone_isolate_freepages(), rework zone_watermark_ok_safe() comment, simplify set_pageblock_isolate() and restore_pageblock_isolate()]
[akpm@linux-foundation.org: fix CONFIG_MEMORY_ISOLATION=n build]
Signed-off-by: Minchan Kim <minchan@kernel.org>
Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: Aaditya Kumar <aaditya.kumar.30@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-08-01 06:43:56 +07:00
|
|
|
set_pageblock_isolate(page);
|
2012-10-09 06:32:00 +07:00
|
|
|
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
|
|
|
|
|
2012-10-09 06:32:02 +07:00
|
|
|
__mod_zone_freepage_state(zone, -nr_pages, migratetype);
|
2012-08-01 06:43:50 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
|
if (!ret)
|
|
|
|
drain_all_pages();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void unset_migratetype_isolate(struct page *page, unsigned migratetype)
|
|
|
|
{
|
|
|
|
struct zone *zone;
|
2012-10-09 06:32:00 +07:00
|
|
|
unsigned long flags, nr_pages;
|
|
|
|
|
2012-08-01 06:43:50 +07:00
|
|
|
zone = page_zone(page);
|
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
|
|
|
goto out;
|
2012-10-09 06:32:00 +07:00
|
|
|
nr_pages = move_freepages_block(zone, page, migratetype);
|
2012-10-09 06:32:02 +07:00
|
|
|
__mod_zone_freepage_state(zone, nr_pages, migratetype);
|
memory-hotplug: fix kswapd looping forever problem
When hotplug offlining happens on zone A, it starts to mark freed page as
MIGRATE_ISOLATE type in buddy for preventing further allocation.
(MIGRATE_ISOLATE is very irony type because it's apparently on buddy but
we can't allocate them).
When the memory shortage happens during hotplug offlining, current task
starts to reclaim, then wake up kswapd. Kswapd checks watermark, then go
sleep because current zone_watermark_ok_safe doesn't consider
MIGRATE_ISOLATE freed page count. Current task continue to reclaim in
direct reclaim path without kswapd's helping. The problem is that
zone->all_unreclaimable is set by only kswapd so that current task would
be looping forever like below.
__alloc_pages_slowpath
restart:
wake_all_kswapd
rebalance:
__alloc_pages_direct_reclaim
do_try_to_free_pages
if global_reclaim && !all_unreclaimable
return 1; /* It means we did did_some_progress */
skip __alloc_pages_may_oom
should_alloc_retry
goto rebalance;
If we apply KOSAKI's patch[1] which doesn't depends on kswapd about
setting zone->all_unreclaimable, we can solve this problem by killing some
task in direct reclaim path. But it doesn't wake up kswapd, still. It
could be a problem still if other subsystem needs GFP_ATOMIC request. So
kswapd should consider MIGRATE_ISOLATE when it calculate free pages BEFORE
going sleep.
This patch counts the number of MIGRATE_ISOLATE page block and
zone_watermark_ok_safe will consider it if the system has such blocks
(fortunately, it's very rare so no problem in POV overhead and kswapd is
never hotpath).
Copy/modify from Mel's quote
"
Ideal solution would be "allocating" the pageblock.
It would keep the free space accounting as it is but historically,
memory hotplug didn't allocate pages because it would be difficult to
detect if a pageblock was isolated or if part of some balloon.
Allocating just full pageblocks would work around this, However,
it would play very badly with CMA.
"
[1] http://lkml.org/lkml/2012/6/14/74
[akpm@linux-foundation.org: simplify nr_zone_isolate_freepages(), rework zone_watermark_ok_safe() comment, simplify set_pageblock_isolate() and restore_pageblock_isolate()]
[akpm@linux-foundation.org: fix CONFIG_MEMORY_ISOLATION=n build]
Signed-off-by: Minchan Kim <minchan@kernel.org>
Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: Aaditya Kumar <aaditya.kumar.30@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-08-01 06:43:56 +07:00
|
|
|
restore_pageblock_isolate(page, migratetype);
|
2012-08-01 06:43:50 +07:00
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
|
}
|
|
|
|
|
2007-10-16 15:26:11 +07:00
|
|
|
static inline struct page *
|
|
|
|
__first_valid_page(unsigned long pfn, unsigned long nr_pages)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < nr_pages; i++)
|
|
|
|
if (pfn_valid_within(pfn + i))
|
|
|
|
break;
|
|
|
|
if (unlikely(i == nr_pages))
|
|
|
|
return NULL;
|
|
|
|
return pfn_to_page(pfn + i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* start_isolate_page_range() -- make page-allocation-type of range of pages
|
|
|
|
* to be MIGRATE_ISOLATE.
|
|
|
|
* @start_pfn: The lower PFN of the range to be isolated.
|
|
|
|
* @end_pfn: The upper PFN of the range to be isolated.
|
2012-04-03 20:06:15 +07:00
|
|
|
* @migratetype: migrate type to set in error recovery.
|
2007-10-16 15:26:11 +07:00
|
|
|
*
|
|
|
|
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
|
|
|
|
* the range will never be allocated. Any free pages and pages freed in the
|
|
|
|
* future will not be allocated again.
|
|
|
|
*
|
|
|
|
* start_pfn/end_pfn must be aligned to pageblock_order.
|
|
|
|
* Returns 0 on success and -EBUSY if any part of range cannot be isolated.
|
|
|
|
*/
|
2012-04-03 20:06:15 +07:00
|
|
|
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
|
|
|
|
unsigned migratetype)
|
2007-10-16 15:26:11 +07:00
|
|
|
{
|
|
|
|
unsigned long pfn;
|
|
|
|
unsigned long undo_pfn;
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
|
|
|
|
for (pfn = start_pfn;
|
|
|
|
pfn < end_pfn;
|
|
|
|
pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
|
|
|
if (page && set_migratetype_isolate(page)) {
|
|
|
|
undo_pfn = pfn;
|
|
|
|
goto undo;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
undo:
|
|
|
|
for (pfn = start_pfn;
|
2007-11-15 07:59:12 +07:00
|
|
|
pfn < undo_pfn;
|
2007-10-16 15:26:11 +07:00
|
|
|
pfn += pageblock_nr_pages)
|
2012-04-03 20:06:15 +07:00
|
|
|
unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
|
2007-10-16 15:26:11 +07:00
|
|
|
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make isolated pages available again.
|
|
|
|
*/
|
2012-04-03 20:06:15 +07:00
|
|
|
int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
|
|
|
|
unsigned migratetype)
|
2007-10-16 15:26:11 +07:00
|
|
|
{
|
|
|
|
unsigned long pfn;
|
|
|
|
struct page *page;
|
|
|
|
BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
for (pfn = start_pfn;
|
|
|
|
pfn < end_pfn;
|
|
|
|
pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
2007-11-15 07:59:12 +07:00
|
|
|
if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
2007-10-16 15:26:11 +07:00
|
|
|
continue;
|
2012-04-03 20:06:15 +07:00
|
|
|
unset_migratetype_isolate(page, migratetype);
|
2007-10-16 15:26:11 +07:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Test all pages in the range is free(means isolated) or not.
|
|
|
|
* all pages in [start_pfn...end_pfn) must be in the same zone.
|
|
|
|
* zone->lock must be held before call this.
|
|
|
|
*
|
2012-04-03 20:06:15 +07:00
|
|
|
* Returns 1 if all pages in the range are isolated.
|
2007-10-16 15:26:11 +07:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
|
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
while (pfn < end_pfn) {
|
|
|
|
if (!pfn_valid_within(pfn)) {
|
|
|
|
pfn++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
page = pfn_to_page(pfn);
|
2012-10-09 06:32:14 +07:00
|
|
|
if (PageBuddy(page)) {
|
2012-10-09 06:32:16 +07:00
|
|
|
/*
|
|
|
|
* If race between isolatation and allocation happens,
|
|
|
|
* some free pages could be in MIGRATE_MOVABLE list
|
|
|
|
* although pageblock's migratation type of the page
|
|
|
|
* is MIGRATE_ISOLATE. Catch it and move the page into
|
|
|
|
* MIGRATE_ISOLATE list.
|
|
|
|
*/
|
|
|
|
if (get_freepage_migratetype(page) != MIGRATE_ISOLATE) {
|
|
|
|
struct page *end_page;
|
|
|
|
|
|
|
|
end_page = page + (1 << page_order(page)) - 1;
|
|
|
|
move_freepages(page_zone(page), page, end_page,
|
|
|
|
MIGRATE_ISOLATE);
|
|
|
|
}
|
2007-10-16 15:26:11 +07:00
|
|
|
pfn += 1 << page_order(page);
|
2012-10-09 06:32:14 +07:00
|
|
|
}
|
2007-10-16 15:26:11 +07:00
|
|
|
else if (page_count(page) == 0 &&
|
2012-10-09 06:32:08 +07:00
|
|
|
get_freepage_migratetype(page) == MIGRATE_ISOLATE)
|
2007-10-16 15:26:11 +07:00
|
|
|
pfn += 1;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (pfn < end_pfn)
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
|
|
|
|
{
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-03 04:50:16 +07:00
|
|
|
unsigned long pfn, flags;
|
2007-10-16 15:26:11 +07:00
|
|
|
struct page *page;
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-03 04:50:16 +07:00
|
|
|
struct zone *zone;
|
|
|
|
int ret;
|
2007-10-16 15:26:11 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: pageblock_nr_page != MAX_ORDER. Then, chunks of free page
|
|
|
|
* is not aligned to pageblock_nr_pages.
|
|
|
|
* Then we just check pagetype fist.
|
|
|
|
*/
|
|
|
|
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
2007-11-15 07:59:12 +07:00
|
|
|
if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
2007-10-16 15:26:11 +07:00
|
|
|
break;
|
|
|
|
}
|
2008-11-07 03:53:36 +07:00
|
|
|
page = __first_valid_page(start_pfn, end_pfn - start_pfn);
|
|
|
|
if ((pfn < end_pfn) || !page)
|
2007-10-16 15:26:11 +07:00
|
|
|
return -EBUSY;
|
|
|
|
/* Check all pages are free or Marked as ISOLATED */
|
2008-11-07 03:53:36 +07:00
|
|
|
zone = page_zone(page);
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-03 04:50:16 +07:00
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn);
|
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
|
return ret ? 0 : -EBUSY;
|
2007-10-16 15:26:11 +07:00
|
|
|
}
|
2012-10-09 06:32:52 +07:00
|
|
|
|
|
|
|
struct page *alloc_migrate_target(struct page *page, unsigned long private,
|
|
|
|
int **resultp)
|
|
|
|
{
|
|
|
|
gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
|
|
|
|
|
|
|
|
if (PageHighMem(page))
|
|
|
|
gfp_mask |= __GFP_HIGHMEM;
|
|
|
|
|
|
|
|
return alloc_page(gfp_mask);
|
|
|
|
}
|