Skip to content

Commit

Permalink
Memory controller: make charging gfp mask aware
Browse files Browse the repository at this point in the history
Nick Piggin pointed out that swap cache and page cache addition routines
could be called from non GFP_KERNEL contexts.  This patch makes the
charging routine aware of the gfp context.  Charging might fail if the
cgroup is over it's limit, in which case a suitable error is returned.

This patch was tested on a Powerpc box.  I am still looking at being able
to test the path, through which allocations happen in non GFP_KERNEL
contexts.

[[email protected]: problem with ZONE_MOVABLE]
Signed-off-by: Balbir Singh <[email protected]>
Cc: Pavel Emelianov <[email protected]>
Cc: Paul Menage <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: "Eric W. Biederman" <[email protected]>
Cc: Nick Piggin <[email protected]>
Cc: Kirill Korotaev <[email protected]>
Cc: Herbert Poetzl <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Vaidyanathan Srinivasan <[email protected]>
Signed-off-by: KAMEZAWA Hiroyuki <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Balbir Singh authored and Linus Torvalds committed Feb 7, 2008
1 parent bed7161 commit e1a1cd5
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 30 deletions.
12 changes: 8 additions & 4 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ extern void mm_free_cgroup(struct mm_struct *mm);
extern void page_assign_page_cgroup(struct page *page,
struct page_cgroup *pc);
extern struct page_cgroup *page_get_page_cgroup(struct page *page);
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm);
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern void mem_cgroup_uncharge(struct page_cgroup *pc);
extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
Expand All @@ -42,7 +43,8 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct mem_cgroup *mem_cont,
int active);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm);
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm);

static inline void mem_cgroup_uncharge_page(struct page *page)
Expand Down Expand Up @@ -70,7 +72,8 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
return NULL;
}

static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
return 0;
}
Expand All @@ -89,7 +92,8 @@ static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
}

static inline int mem_cgroup_cache_charge(struct page *page,
struct mm_struct *mm)
struct mm_struct *mm,
gfp_t gfp_mask)
{
return 0;
}
Expand Down
3 changes: 2 additions & 1 deletion include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ extern void swap_setup(void);
/* linux/mm/vmscan.c */
extern unsigned long try_to_free_pages(struct zone **zones, int order,
gfp_t gfp_mask);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
gfp_t gfp_mask);
extern int __isolate_lru_page(struct page *page, int mode);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
Expand Down
2 changes: 1 addition & 1 deletion mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,

if (error == 0) {

error = mem_cgroup_cache_charge(page, current->mm);
error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
if (error)
goto out;

Expand Down
24 changes: 17 additions & 7 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
struct mem_cgroup *mem;
struct page_cgroup *pc, *race_pc;
Expand Down Expand Up @@ -293,7 +294,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)

unlock_page_cgroup(page);

pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL);
pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
if (pc == NULL)
goto err;

Expand All @@ -320,7 +321,14 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
* the cgroup limit.
*/
while (res_counter_charge(&mem->res, PAGE_SIZE)) {
if (try_to_free_mem_cgroup_pages(mem))
bool is_atomic = gfp_mask & GFP_ATOMIC;
/*
* We cannot reclaim under GFP_ATOMIC, fail the charge
*/
if (is_atomic)
goto noreclaim;

if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
continue;

/*
Expand All @@ -344,9 +352,10 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
congestion_wait(WRITE, HZ/10);
continue;
}

noreclaim:
css_put(&mem->css);
mem_cgroup_out_of_memory(mem, GFP_KERNEL);
if (!is_atomic)
mem_cgroup_out_of_memory(mem, GFP_KERNEL);
goto free_pc;
}

Expand Down Expand Up @@ -385,15 +394,16 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
/*
* See if the cached pages should be charged at all?
*/
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm)
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
struct mem_cgroup *mem;
if (!mm)
mm = &init_mm;

mem = rcu_dereference(mm->mem_cgroup);
if (mem->control_type == MEM_CGROUP_TYPE_ALL)
return mem_cgroup_charge(page, mm);
return mem_cgroup_charge(page, mm, gfp_mask);
else
return 0;
}
Expand Down
10 changes: 5 additions & 5 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1147,7 +1147,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
pte_t *pte;
spinlock_t *ptl;

retval = mem_cgroup_charge(page, mm);
retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
if (retval)
goto out;

Expand Down Expand Up @@ -1650,7 +1650,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
cow_user_page(new_page, old_page, address, vma);
__SetPageUptodate(new_page);

if (mem_cgroup_charge(new_page, mm))
if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
goto oom_free_new;

/*
Expand Down Expand Up @@ -2052,7 +2052,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(PGMAJFAULT);
}

if (mem_cgroup_charge(page, mm)) {
if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
ret = VM_FAULT_OOM;
goto out;
Expand Down Expand Up @@ -2139,7 +2139,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto oom;
__SetPageUptodate(page);

if (mem_cgroup_charge(page, mm))
if (mem_cgroup_charge(page, mm, GFP_KERNEL))
goto oom_free_page;

entry = mk_pte(page, vma->vm_page_prot);
Expand Down Expand Up @@ -2277,7 +2277,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,

}

if (mem_cgroup_charge(page, mm)) {
if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
ret = VM_FAULT_OOM;
goto out;
}
Expand Down
2 changes: 1 addition & 1 deletion mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ static void remove_migration_pte(struct vm_area_struct *vma,
return;
}

if (mem_cgroup_charge(new, mm)) {
if (mem_cgroup_charge(new, mm, GFP_KERNEL)) {
pte_unmap(ptep);
return;
}
Expand Down
2 changes: 1 addition & 1 deletion mm/swap_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
error = radix_tree_preload(gfp_mask);
if (!error) {

error = mem_cgroup_cache_charge(page, current->mm);
error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
if (error)
goto out;

Expand Down
2 changes: 1 addition & 1 deletion mm/swapfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ unsigned int count_swap_pages(int type, int free)
static int unuse_pte(struct vm_area_struct *vma, pte_t *pte,
unsigned long addr, swp_entry_t entry, struct page *page)
{
if (mem_cgroup_charge(page, vma->vm_mm))
if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
return -ENOMEM;

inc_mm_counter(vma->vm_mm, anon_rss);
Expand Down
14 changes: 5 additions & 9 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1337,16 +1337,11 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)

#ifdef CONFIG_CGROUP_MEM_CONT

#ifdef CONFIG_HIGHMEM
#define ZONE_USERPAGES ZONE_HIGHMEM
#else
#define ZONE_USERPAGES ZONE_NORMAL
#endif

unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask)
{
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.gfp_mask = gfp_mask,
.may_writepage = !laptop_mode,
.may_swap = 1,
.swap_cluster_max = SWAP_CLUSTER_MAX,
Expand All @@ -1357,9 +1352,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
};
int node;
struct zone **zones;
int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);

for_each_online_node(node) {
zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones;
zones = NODE_DATA(node)->node_zonelists[target_zone].zones;
if (do_try_to_free_pages(zones, sc.gfp_mask, &sc))
return 1;
}
Expand Down

0 comments on commit e1a1cd5

Please sign in to comment.