 867578cbcc
			
		
	
	
	867578cbcc
	
	
	
		
			
			In current page-fault code,
	handle_mm_fault()
		-> ...
		-> mem_cgroup_charge()
		-> map page or handle error.
	-> check return code.
If page fault's return code is VM_FAULT_OOM, page_fault_out_of_memory() is
called.  But if it's caused by memcg, OOM should have been already
invoked.
Then, I added a patch: a636b327f7.  That
patch records last_oom_jiffies for memcg's sub-hierarchy and prevents
page_fault_out_of_memory from being invoked in near future.
But Nishimura-san reported that check by jiffies is not enough when the
system is terribly heavy.
This patch changes memcg's oom logic as.
 * If memcg causes OOM-kill, continue to retry.
 * remove jiffies check which is used now.
 * add memcg-oom-lock which works like perzone oom lock.
 * If current is killed(as a process), bypass charge.
Something more sophisticated can be added but this pactch does
fundamental things.
TODO:
 - add oom notifier
 - add permemcg disable-oom-kill flag and freezer at oom.
 - more chances for wake up oom waiter (when changing memory limit etc..)
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			312 lines
		
	
	
	
		
			8.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			312 lines
		
	
	
	
		
			8.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /* memcontrol.h - Memory Controller
 | |
|  *
 | |
|  * Copyright IBM Corporation, 2007
 | |
|  * Author Balbir Singh <balbir@linux.vnet.ibm.com>
 | |
|  *
 | |
|  * Copyright 2007 OpenVZ SWsoft Inc
 | |
|  * Author: Pavel Emelianov <xemul@openvz.org>
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU General Public License as published by
 | |
|  * the Free Software Foundation; either version 2 of the License, or
 | |
|  * (at your option) any later version.
 | |
|  *
 | |
|  * This program is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|  * GNU General Public License for more details.
 | |
|  */
 | |
| 
 | |
| #ifndef _LINUX_MEMCONTROL_H
 | |
| #define _LINUX_MEMCONTROL_H
 | |
| #include <linux/cgroup.h>
 | |
| struct mem_cgroup;
 | |
| struct page_cgroup;
 | |
| struct page;
 | |
| struct mm_struct;
 | |
| 
 | |
| #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 | |
| /*
 | |
|  * All "charge" functions with gfp_mask should use GFP_KERNEL or
 | |
|  * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
 | |
|  * alloc memory but reclaims memory from all available zones. So, "where I want
 | |
|  * memory from" bits of gfp_mask has no meaning. So any bits of that field is
 | |
|  * available but adding a rule is better. charge functions' gfp_mask should
 | |
|  * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
 | |
|  * codes.
 | |
|  * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
 | |
|  */
 | |
| 
 | |
| extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 | |
| 				gfp_t gfp_mask);
 | |
| /* for swap handling */
 | |
| extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 | |
| 		struct page *page, gfp_t mask, struct mem_cgroup **ptr);
 | |
| extern void mem_cgroup_commit_charge_swapin(struct page *page,
 | |
| 					struct mem_cgroup *ptr);
 | |
| extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
 | |
| 
 | |
| extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 | |
| 					gfp_t gfp_mask);
 | |
| extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
 | |
| extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
 | |
| extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
 | |
| extern void mem_cgroup_del_lru(struct page *page);
 | |
| extern void mem_cgroup_move_lists(struct page *page,
 | |
| 				  enum lru_list from, enum lru_list to);
 | |
| 
 | |
| /* For coalescing uncharge for reducing memcg' overhead*/
 | |
| extern void mem_cgroup_uncharge_start(void);
 | |
| extern void mem_cgroup_uncharge_end(void);
 | |
| 
 | |
| extern void mem_cgroup_uncharge_page(struct page *page);
 | |
| extern void mem_cgroup_uncharge_cache_page(struct page *page);
 | |
| extern int mem_cgroup_shmem_charge_fallback(struct page *page,
 | |
| 			struct mm_struct *mm, gfp_t gfp_mask);
 | |
| 
 | |
| extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 | |
| 					struct list_head *dst,
 | |
| 					unsigned long *scanned, int order,
 | |
| 					int mode, struct zone *z,
 | |
| 					struct mem_cgroup *mem_cont,
 | |
| 					int active, int file);
 | |
| extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 | |
| int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 | |
| 
 | |
| extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 | |
| extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 | |
| 
 | |
| static inline
 | |
| int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 | |
| {
 | |
| 	struct mem_cgroup *mem;
 | |
| 	rcu_read_lock();
 | |
| 	mem = mem_cgroup_from_task(rcu_dereference((mm)->owner));
 | |
| 	rcu_read_unlock();
 | |
| 	return cgroup == mem;
 | |
| }
 | |
| 
 | |
| extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
 | |
| 
 | |
| extern int
 | |
| mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
 | |
| extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
 | |
| 	struct page *oldpage, struct page *newpage);
 | |
| 
 | |
| /*
 | |
|  * For memory reclaim.
 | |
|  */
 | |
| extern int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem);
 | |
| extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 | |
| 							int priority);
 | |
| extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 | |
| 							int priority);
 | |
| int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
 | |
| int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
 | |
| unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
 | |
| 				       struct zone *zone,
 | |
| 				       enum lru_list lru);
 | |
| struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
 | |
| 						      struct zone *zone);
 | |
| struct zone_reclaim_stat*
 | |
| mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 | |
| extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 | |
| 					struct task_struct *p);
 | |
| 
 | |
| #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 | |
| extern int do_swap_account;
 | |
| #endif
 | |
| 
 | |
| static inline bool mem_cgroup_disabled(void)
 | |
| {
 | |
| 	if (mem_cgroup_subsys.disabled)
 | |
| 		return true;
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| void mem_cgroup_update_file_mapped(struct page *page, int val);
 | |
| unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 | |
| 						gfp_t gfp_mask, int nid,
 | |
| 						int zid);
 | |
| #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 | |
| struct mem_cgroup;
 | |
| 
 | |
| static inline int mem_cgroup_newpage_charge(struct page *page,
 | |
| 					struct mm_struct *mm, gfp_t gfp_mask)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline int mem_cgroup_cache_charge(struct page *page,
 | |
| 					struct mm_struct *mm, gfp_t gfp_mask)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 | |
| 		struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_commit_charge_swapin(struct page *page,
 | |
| 					  struct mem_cgroup *ptr)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_uncharge_start(void)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_uncharge_end(void)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_uncharge_page(struct page *page)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
 | |
| 			struct mm_struct *mm, gfp_t gfp_mask)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
 | |
| {
 | |
| 	return ;
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
 | |
| {
 | |
| 	return ;
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_del_lru(struct page *page)
 | |
| {
 | |
| 	return ;
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 | |
| {
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
 | |
| {
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| static inline int task_in_mem_cgroup(struct task_struct *task,
 | |
| 				     const struct mem_cgroup *mem)
 | |
| {
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
 | |
| {
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
 | |
| 					struct page *oldpage,
 | |
| 					struct page *newpage)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 | |
| 						int priority)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 | |
| 						int priority)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline bool mem_cgroup_disabled(void)
 | |
| {
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
 | |
| {
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
 | |
| {
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| static inline unsigned long
 | |
| mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
 | |
| 			 enum lru_list lru)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static inline struct zone_reclaim_stat*
 | |
| mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone)
 | |
| {
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| static inline struct zone_reclaim_stat*
 | |
| mem_cgroup_get_reclaim_stat_from_page(struct page *page)
 | |
| {
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void mem_cgroup_update_file_mapped(struct page *page,
 | |
| 							int val)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline
 | |
| unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 | |
| 					    gfp_t gfp_mask, int nid, int zid)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| #endif /* CONFIG_CGROUP_MEM_CONT */
 | |
| 
 | |
| #endif /* _LINUX_MEMCONTROL_H */
 | |
| 
 |