mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 09:30:52 +07:00
[PATCH] Direct Migration V9: upgrade MPOL_MF_MOVE and sys_migrate_pages()
Modify policy layer to support direct page migration - Add migrate_pages_to() allowing the migration of a list of pages to a a specified node or to vma with a specific allocation policy in sets of MIGRATE_CHUNK_SIZE pages - Modify do_migrate_pages() to do a staged move of pages from the source nodes to the target nodes. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
a3351e525e
commit
7e2ab150d1
167
mm/mempolicy.c
167
mm/mempolicy.c
@ -95,6 +95,9 @@
|
||||
#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
|
||||
#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */
|
||||
|
||||
/* The number of pages to migrate per call to migrate_pages() */
|
||||
#define MIGRATE_CHUNK_SIZE 256
|
||||
|
||||
static kmem_cache_t *policy_cache;
|
||||
static kmem_cache_t *sn_cache;
|
||||
|
||||
@ -543,24 +546,91 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
|
||||
}
|
||||
}
|
||||
|
||||
static int swap_pages(struct list_head *pagelist)
|
||||
/*
|
||||
* Migrate the list 'pagelist' of pages to a certain destination.
|
||||
*
|
||||
* Specify destination with either non-NULL vma or dest_node >= 0
|
||||
* Return the number of pages not migrated or error code
|
||||
*/
|
||||
static int migrate_pages_to(struct list_head *pagelist,
|
||||
struct vm_area_struct *vma, int dest)
|
||||
{
|
||||
LIST_HEAD(newlist);
|
||||
LIST_HEAD(moved);
|
||||
LIST_HEAD(failed);
|
||||
int n;
|
||||
int err = 0;
|
||||
int nr_pages;
|
||||
struct page *page;
|
||||
struct list_head *p;
|
||||
|
||||
n = migrate_pages(pagelist, NULL, &moved, &failed);
|
||||
putback_lru_pages(&failed);
|
||||
putback_lru_pages(&moved);
|
||||
redo:
|
||||
nr_pages = 0;
|
||||
list_for_each(p, pagelist) {
|
||||
if (vma)
|
||||
page = alloc_page_vma(GFP_HIGHUSER, vma, vma->vm_start);
|
||||
else
|
||||
page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
|
||||
|
||||
return n;
|
||||
if (!page) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
list_add(&page->lru, &newlist);
|
||||
nr_pages++;
|
||||
if (nr_pages > MIGRATE_CHUNK_SIZE);
|
||||
break;
|
||||
}
|
||||
err = migrate_pages(pagelist, &newlist, &moved, &failed);
|
||||
|
||||
putback_lru_pages(&moved); /* Call release pages instead ?? */
|
||||
|
||||
if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
|
||||
goto redo;
|
||||
out:
|
||||
/* Return leftover allocated pages */
|
||||
while (!list_empty(&newlist)) {
|
||||
page = list_entry(newlist.next, struct page, lru);
|
||||
list_del(&page->lru);
|
||||
__free_page(page);
|
||||
}
|
||||
list_splice(&failed, pagelist);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
/* Calculate number of leftover pages */
|
||||
nr_pages = 0;
|
||||
list_for_each(p, pagelist)
|
||||
nr_pages++;
|
||||
return nr_pages;
|
||||
}
|
||||
|
||||
/*
|
||||
* For now migrate_pages simply swaps out the pages from nodes that are in
|
||||
* the source set but not in the target set. In the future, we would
|
||||
* want a function that moves pages between the two nodesets in such
|
||||
* a way as to preserve the physical layout as much as possible.
|
||||
* Migrate pages from one node to a target node.
|
||||
* Returns error or the number of pages not migrated.
|
||||
*/
|
||||
int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags)
|
||||
{
|
||||
nodemask_t nmask;
|
||||
LIST_HEAD(pagelist);
|
||||
int err = 0;
|
||||
|
||||
nodes_clear(nmask);
|
||||
node_set(source, nmask);
|
||||
|
||||
check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nmask,
|
||||
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
|
||||
|
||||
if (!list_empty(&pagelist)) {
|
||||
err = migrate_pages_to(&pagelist, NULL, dest);
|
||||
if (!list_empty(&pagelist))
|
||||
putback_lru_pages(&pagelist);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move pages between the two nodesets so as to preserve the physical
|
||||
* layout as much as possible.
|
||||
*
|
||||
* Returns the number of page that could not be moved.
|
||||
*/
|
||||
@ -568,22 +638,76 @@ int do_migrate_pages(struct mm_struct *mm,
|
||||
const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
|
||||
{
|
||||
LIST_HEAD(pagelist);
|
||||
int count = 0;
|
||||
nodemask_t nodes;
|
||||
int busy = 0;
|
||||
int err = 0;
|
||||
nodemask_t tmp;
|
||||
|
||||
nodes_andnot(nodes, *from_nodes, *to_nodes);
|
||||
down_read(&mm->mmap_sem);
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes,
|
||||
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
|
||||
/*
|
||||
* Find a 'source' bit set in 'tmp' whose corresponding 'dest'
|
||||
* bit in 'to' is not also set in 'tmp'. Clear the found 'source'
|
||||
* bit in 'tmp', and return that <source, dest> pair for migration.
|
||||
* The pair of nodemasks 'to' and 'from' define the map.
|
||||
*
|
||||
* If no pair of bits is found that way, fallback to picking some
|
||||
* pair of 'source' and 'dest' bits that are not the same. If the
|
||||
* 'source' and 'dest' bits are the same, this represents a node
|
||||
* that will be migrating to itself, so no pages need move.
|
||||
*
|
||||
* If no bits are left in 'tmp', or if all remaining bits left
|
||||
* in 'tmp' correspond to the same bit in 'to', return false
|
||||
* (nothing left to migrate).
|
||||
*
|
||||
* This lets us pick a pair of nodes to migrate between, such that
|
||||
* if possible the dest node is not already occupied by some other
|
||||
* source node, minimizing the risk of overloading the memory on a
|
||||
* node that would happen if we migrated incoming memory to a node
|
||||
* before migrating outgoing memory source that same node.
|
||||
*
|
||||
* A single scan of tmp is sufficient. As we go, we remember the
|
||||
* most recent <s, d> pair that moved (s != d). If we find a pair
|
||||
* that not only moved, but what's better, moved to an empty slot
|
||||
* (d is not set in tmp), then we break out then, with that pair.
|
||||
* Otherwise when we finish scannng from_tmp, we at least have the
|
||||
* most recent <s, d> pair that moved. If we get all the way through
|
||||
* the scan of tmp without finding any node that moved, much less
|
||||
* moved to an empty node, then there is nothing left worth migrating.
|
||||
*/
|
||||
|
||||
if (!list_empty(&pagelist)) {
|
||||
count = swap_pages(&pagelist);
|
||||
putback_lru_pages(&pagelist);
|
||||
tmp = *from_nodes;
|
||||
while (!nodes_empty(tmp)) {
|
||||
int s,d;
|
||||
int source = -1;
|
||||
int dest = 0;
|
||||
|
||||
for_each_node_mask(s, tmp) {
|
||||
d = node_remap(s, *from_nodes, *to_nodes);
|
||||
if (s == d)
|
||||
continue;
|
||||
|
||||
source = s; /* Node moved. Memorize */
|
||||
dest = d;
|
||||
|
||||
/* dest not in remaining from nodes? */
|
||||
if (!node_isset(dest, tmp))
|
||||
break;
|
||||
}
|
||||
if (source == -1)
|
||||
break;
|
||||
|
||||
node_clear(source, tmp);
|
||||
err = migrate_to_node(mm, source, dest, flags);
|
||||
if (err > 0)
|
||||
busy += err;
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
return count;
|
||||
if (err < 0)
|
||||
return err;
|
||||
return busy;
|
||||
}
|
||||
|
||||
long do_mbind(unsigned long start, unsigned long len,
|
||||
@ -643,8 +767,9 @@ long do_mbind(unsigned long start, unsigned long len,
|
||||
int nr_failed = 0;
|
||||
|
||||
err = mbind_range(vma, start, end, new);
|
||||
|
||||
if (!list_empty(&pagelist))
|
||||
nr_failed = swap_pages(&pagelist);
|
||||
nr_failed = migrate_pages_to(&pagelist, vma, -1);
|
||||
|
||||
if (!err && nr_failed && (flags & MPOL_MF_STRICT))
|
||||
err = -EIO;
|
||||
|
Loading…
Reference in New Issue
Block a user