Skip to content

Commit be97a41

Browse files
tehcastertorvalds
authored andcommitted
mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma
The previous commit ("mm/thp: Allocate transparent hugepages on local node") introduced alloc_hugepage_vma() to mm/mempolicy.c to perform a special policy for THP allocations. The function has the same interface as alloc_pages_vma(), shares a lot of boilerplate code and a long comment. This patch merges the hugepage special case into alloc_pages_vma. The extra if condition should be cheap enough price to pay. We also prevent a (however unlikely) race with parallel mems_allowed update, which could make hugepage allocation restart only within the fallback call to alloc_hugepage_vma() and not reconsider the special rule in alloc_hugepage_vma(). Also by making sure mpol_cond_put(pol) is always called before actual allocation attempt, we can use a single exit path within the function. Also update the comment for missing node parameter and obsolete reference to mm_sem. Signed-off-by: Vlastimil Babka <[email protected]> Cc: Aneesh Kumar K.V <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: David Rientjes <[email protected]> Cc: Andrea Arcangeli <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 077fcf1 commit be97a41

File tree

2 files changed

+39
-91
lines changed

2 files changed

+39
-91
lines changed

include/linux/gfp.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -334,22 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
334334
}
335335
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
336336
struct vm_area_struct *vma, unsigned long addr,
337-
int node);
338-
extern struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma,
339-
unsigned long addr, int order);
337+
int node, bool hugepage);
338+
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
339+
alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
340340
#else
341341
#define alloc_pages(gfp_mask, order) \
342342
alloc_pages_node(numa_node_id(), gfp_mask, order)
343-
#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \
343+
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
344344
alloc_pages(gfp_mask, order)
345345
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
346346
alloc_pages(gfp_mask, order)
347347
#endif
348348
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
349349
#define alloc_page_vma(gfp_mask, vma, addr) \
350-
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
350+
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
351351
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
352-
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
352+
alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
353353

354354
extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
355355
extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,

mm/mempolicy.c

+33-85
Original file line numberDiff line numberDiff line change
@@ -1988,120 +1988,68 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
19881988
* @order:Order of the GFP allocation.
19891989
* @vma: Pointer to VMA or NULL if not available.
19901990
* @addr: Virtual Address of the allocation. Must be inside the VMA.
1991+
* @node: Which node to prefer for allocation (modulo policy).
1992+
* @hugepage: for hugepages try only the preferred node if possible
19911993
*
19921994
* This function allocates a page from the kernel page pool and applies
19931995
* a NUMA policy associated with the VMA or the current process.
19941996
* When VMA is not NULL caller must hold down_read on the mmap_sem of the
19951997
* mm_struct of the VMA to prevent it from going away. Should be used for
1996-
* all allocations for pages that will be mapped into
1997-
* user space. Returns NULL when no page can be allocated.
1998-
*
1999-
* Should be called with the mm_sem of the vma hold.
1998+
* all allocations for pages that will be mapped into user space. Returns
1999+
* NULL when no page can be allocated.
20002000
*/
20012001
struct page *
20022002
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2003-
unsigned long addr, int node)
2003+
unsigned long addr, int node, bool hugepage)
20042004
{
20052005
struct mempolicy *pol;
20062006
struct page *page;
20072007
unsigned int cpuset_mems_cookie;
2008+
struct zonelist *zl;
2009+
nodemask_t *nmask;
20082010

20092011
retry_cpuset:
20102012
pol = get_vma_policy(vma, addr);
20112013
cpuset_mems_cookie = read_mems_allowed_begin();
20122014

2013-
if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
2015+
if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
2016+
pol->mode != MPOL_INTERLEAVE)) {
2017+
/*
2018+
* For hugepage allocation and non-interleave policy which
2019+
* allows the current node, we only try to allocate from the
2020+
* current node and don't fall back to other nodes, as the
2021+
* cost of remote accesses would likely offset THP benefits.
2022+
*
2023+
* If the policy is interleave, or does not allow the current
2024+
* node in its nodemask, we allocate the standard way.
2025+
*/
2026+
nmask = policy_nodemask(gfp, pol);
2027+
if (!nmask || node_isset(node, *nmask)) {
2028+
mpol_cond_put(pol);
2029+
page = alloc_pages_exact_node(node, gfp, order);
2030+
goto out;
2031+
}
2032+
}
2033+
2034+
if (pol->mode == MPOL_INTERLEAVE) {
20142035
unsigned nid;
20152036

20162037
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
20172038
mpol_cond_put(pol);
20182039
page = alloc_page_interleave(gfp, order, nid);
2019-
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
2020-
goto retry_cpuset;
2021-
2022-
return page;
2040+
goto out;
20232041
}
2024-
page = __alloc_pages_nodemask(gfp, order,
2025-
policy_zonelist(gfp, pol, node),
2026-
policy_nodemask(gfp, pol));
2042+
2043+
nmask = policy_nodemask(gfp, pol);
2044+
zl = policy_zonelist(gfp, pol, node);
20272045
mpol_cond_put(pol);
2046+
page = __alloc_pages_nodemask(gfp, order, zl, nmask);
2047+
out:
20282048
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
20292049
goto retry_cpuset;
20302050
return page;
20312051
}
20322052

2033-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2034-
/**
2035-
* alloc_hugepage_vma: Allocate a hugepage for a VMA
2036-
* @gfp:
2037-
* %GFP_USER user allocation.
2038-
* %GFP_KERNEL kernel allocations,
2039-
* %GFP_HIGHMEM highmem/user allocations,
2040-
* %GFP_FS allocation should not call back into a file system.
2041-
* %GFP_ATOMIC don't sleep.
2042-
*
2043-
* @vma: Pointer to VMA or NULL if not available.
2044-
* @addr: Virtual Address of the allocation. Must be inside the VMA.
2045-
* @order: Order of the hugepage for gfp allocation.
2046-
*
2047-
* This functions allocate a huge page from the kernel page pool and applies
2048-
* a NUMA policy associated with the VMA or the current process.
2049-
* For policy other than %MPOL_INTERLEAVE, we make sure we allocate hugepage
2050-
* only from the current node if the current node is part of the node mask.
2051-
* If we can't allocate a hugepage we fail the allocation and don' try to fallback
2052-
* to other nodes in the node mask. If the current node is not part of node mask
2053-
* or if the NUMA policy is MPOL_INTERLEAVE we use the allocator that can
2054-
* fallback to nodes in the policy node mask.
2055-
*
2056-
* When VMA is not NULL caller must hold down_read on the mmap_sem of the
2057-
* mm_struct of the VMA to prevent it from going away. Should be used for
2058-
* all allocations for pages that will be mapped into
2059-
* user space. Returns NULL when no page can be allocated.
2060-
*
2061-
* Should be called with vma->vm_mm->mmap_sem held.
2062-
*
2063-
*/
2064-
struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma,
2065-
unsigned long addr, int order)
2066-
{
2067-
struct page *page;
2068-
nodemask_t *nmask;
2069-
struct mempolicy *pol;
2070-
int node = numa_node_id();
2071-
unsigned int cpuset_mems_cookie;
2072-
2073-
retry_cpuset:
2074-
pol = get_vma_policy(vma, addr);
2075-
cpuset_mems_cookie = read_mems_allowed_begin();
2076-
/*
2077-
* For interleave policy, we don't worry about
2078-
* current node. Otherwise if current node is
2079-
* in nodemask, try to allocate hugepage from
2080-
* the current node. Don't fall back to other nodes
2081-
* for THP.
2082-
*/
2083-
if (unlikely(pol->mode == MPOL_INTERLEAVE))
2084-
goto alloc_with_fallback;
2085-
nmask = policy_nodemask(gfp, pol);
2086-
if (!nmask || node_isset(node, *nmask)) {
2087-
mpol_cond_put(pol);
2088-
page = alloc_pages_exact_node(node, gfp, order);
2089-
if (unlikely(!page &&
2090-
read_mems_allowed_retry(cpuset_mems_cookie)))
2091-
goto retry_cpuset;
2092-
return page;
2093-
}
2094-
alloc_with_fallback:
2095-
mpol_cond_put(pol);
2096-
/*
2097-
* if current node is not part of node mask, try
2098-
* the allocation from any node, and we can do retry
2099-
* in that case.
2100-
*/
2101-
return alloc_pages_vma(gfp, order, vma, addr, node);
2102-
}
2103-
#endif
2104-
21052053
/**
21062054
* alloc_pages_current - Allocate pages.
21072055
*

0 commit comments

Comments
 (0)