ipc: do not use a negative value to re-enable msgmni automatic recomputing
This patch proposes an alternative to the "magical positive-versus-negative number trick" Andrew complained about last week in http://lkml.org/lkml/2008/6/24/418. This had been introduced with the patches that scale msgmni to the amount of lowmem. With these patches, msgmni has a registered notification routine that recomputes msgmni value upon memory add/remove or ipc namespace creation/ removal. When msgmni is changed from user space (i.e. value written to the proc file), that notification routine is unregistered, and the way to make it registered back is to write a negative value into the proc file. This is the "magical positive-versus-negative number trick". To fix this, a new proc file is introduced: /proc/sys/kernel/auto_msgmni. This file acts as ON/OFF for msgmni automatic recomputing. With this patch, the process is the following: 1) kernel boots in "automatic recomputing mode" /proc/sys/kernel/msgmni contains the value that has been computed (depends on lowmem) /proc/sys/kernel/automatic_msgmni contains "1" 2) echo <val> > /proc/sys/kernel/msgmni . sets msg_ctlmni to <val> . de-activates automatic recomputing (i.e. if, say, some memory is added msgmni won't be recomputed anymore) . /proc/sys/kernel/automatic_msgmni now contains "0" 3) echo "0" > /proc/sys/kernel/automatic_msgmni . de-activates msgmni automatic recomputing this has the same effect as 2) except that msg_ctlmni's value stays blocked at its current value) 3) echo "1" > /proc/sys/kernel/automatic_msgmni . recomputes msgmni's value based on the current available memory size and number of ipc namespaces . re-activates automatic recomputing for msgmni. Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net> Cc: Solofo Ramangalahy <Solofo.Ramangalahy@bull.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
					parent
					
						
							
								f1a43f93f0
							
						
					
				
			
			
				commit
				
					
						9eefe520c8
					
				
			
		
					 3 changed files with 76 additions and 19 deletions
				
			
		|  | @ -36,6 +36,7 @@ struct ipc_namespace { | ||||||
| 	int		msg_ctlmni; | 	int		msg_ctlmni; | ||||||
| 	atomic_t	msg_bytes; | 	atomic_t	msg_bytes; | ||||||
| 	atomic_t	msg_hdrs; | 	atomic_t	msg_hdrs; | ||||||
|  | 	int		auto_msgmni; | ||||||
| 
 | 
 | ||||||
| 	size_t		shm_ctlmax; | 	size_t		shm_ctlmax; | ||||||
| 	size_t		shm_ctlall; | 	size_t		shm_ctlall; | ||||||
|  | @ -53,7 +54,7 @@ extern atomic_t nr_ipc_ns; | ||||||
| 
 | 
 | ||||||
| extern int register_ipcns_notifier(struct ipc_namespace *); | extern int register_ipcns_notifier(struct ipc_namespace *); | ||||||
| extern int cond_register_ipcns_notifier(struct ipc_namespace *); | extern int cond_register_ipcns_notifier(struct ipc_namespace *); | ||||||
| extern int unregister_ipcns_notifier(struct ipc_namespace *); | extern void unregister_ipcns_notifier(struct ipc_namespace *); | ||||||
| extern int ipcns_notify(unsigned long); | extern int ipcns_notify(unsigned long); | ||||||
| 
 | 
 | ||||||
| #else /* CONFIG_SYSVIPC */ | #else /* CONFIG_SYSVIPC */ | ||||||
|  |  | ||||||
|  | @ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Routine that is called when a tunable has successfully been changed by |  * Routine that is called when the file "auto_msgmni" has successfully been | ||||||
|  * hand and it has a callback routine registered on the ipc namespace notifier |  * written. | ||||||
|  * chain: we don't want such tunables to be recomputed anymore upon memory |  * Two values are allowed: | ||||||
|  |  * 0: unregister msgmni's callback routine from the ipc namespace notifier | ||||||
|  |  *    chain. This means that msgmni won't be recomputed anymore upon memory | ||||||
|  *    add/remove or ipc namespace creation/removal. |  *    add/remove or ipc namespace creation/removal. | ||||||
|  * They can come back to a recomputable state by being set to a <0 value. |  * 1: register back the callback routine. | ||||||
|  */ |  */ | ||||||
| static void tunable_set_callback(int val) | static void ipc_auto_callback(int val) | ||||||
| { | { | ||||||
| 	if (val >= 0) | 	if (!val) | ||||||
| 		unregister_ipcns_notifier(current->nsproxy->ipc_ns); | 		unregister_ipcns_notifier(current->nsproxy->ipc_ns); | ||||||
| 	else { | 	else { | ||||||
| 		/*
 | 		/*
 | ||||||
|  | @ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, | ||||||
| 	rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); | 	rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); | ||||||
| 
 | 
 | ||||||
| 	if (write && !rc && lenp_bef == *lenp) | 	if (write && !rc && lenp_bef == *lenp) | ||||||
| 		tunable_set_callback(*((int *)(ipc_table.data))); | 		/*
 | ||||||
|  | 		 * Tunable has successfully been changed by hand. Disable its | ||||||
|  | 		 * automatic adjustment. This simply requires unregistering | ||||||
|  | 		 * the notifiers that trigger recalculation. | ||||||
|  | 		 */ | ||||||
|  | 		unregister_ipcns_notifier(current->nsproxy->ipc_ns); | ||||||
| 
 | 
 | ||||||
| 	return rc; | 	return rc; | ||||||
| } | } | ||||||
|  | @ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, | ||||||
| 					lenp, ppos); | 					lenp, ppos); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, | ||||||
|  | 	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) | ||||||
|  | { | ||||||
|  | 	struct ctl_table ipc_table; | ||||||
|  | 	size_t lenp_bef = *lenp; | ||||||
|  | 	int oldval; | ||||||
|  | 	int rc; | ||||||
|  | 
 | ||||||
|  | 	memcpy(&ipc_table, table, sizeof(ipc_table)); | ||||||
|  | 	ipc_table.data = get_ipc(table); | ||||||
|  | 	oldval = *((int *)(ipc_table.data)); | ||||||
|  | 
 | ||||||
|  | 	rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); | ||||||
|  | 
 | ||||||
|  | 	if (write && !rc && lenp_bef == *lenp) { | ||||||
|  | 		int newval = *((int *)(ipc_table.data)); | ||||||
|  | 		/*
 | ||||||
|  | 		 * The file "auto_msgmni" has correctly been set. | ||||||
|  | 		 * React by (un)registering the corresponding tunable, if the | ||||||
|  | 		 * value has changed. | ||||||
|  | 		 */ | ||||||
|  | 		if (newval != oldval) | ||||||
|  | 			ipc_auto_callback(newval); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return rc; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #else | #else | ||||||
| #define proc_ipc_doulongvec_minmax NULL | #define proc_ipc_doulongvec_minmax NULL | ||||||
| #define proc_ipc_dointvec	   NULL | #define proc_ipc_dointvec	   NULL | ||||||
| #define proc_ipc_callback_dointvec NULL | #define proc_ipc_callback_dointvec NULL | ||||||
|  | #define proc_ipcauto_dointvec_minmax NULL | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_SYSCTL_SYSCALL | #ifdef CONFIG_SYSCTL_SYSCALL | ||||||
|  | @ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, | ||||||
| 	rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, | 	rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, | ||||||
| 		newlen); | 		newlen); | ||||||
| 
 | 
 | ||||||
| 	if (newval && newlen && rc > 0) { | 	if (newval && newlen && rc > 0) | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * Tunable has successfully been changed from userland | 		 * Tunable has successfully been changed from userland | ||||||
| 		 */ | 		 */ | ||||||
| 		int *data = get_ipc(table); | 		unregister_ipcns_notifier(current->nsproxy->ipc_ns); | ||||||
| 
 |  | ||||||
| 		tunable_set_callback(*data); |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	return rc; | 	return rc; | ||||||
| } | } | ||||||
|  | @ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, | ||||||
| #define sysctl_ipc_registered_data NULL | #define sysctl_ipc_registered_data NULL | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | static int zero; | ||||||
|  | static int one = 1; | ||||||
|  | 
 | ||||||
| static struct ctl_table ipc_kern_table[] = { | static struct ctl_table ipc_kern_table[] = { | ||||||
| 	{ | 	{ | ||||||
| 		.ctl_name	= KERN_SHMMAX, | 		.ctl_name	= KERN_SHMMAX, | ||||||
|  | @ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = { | ||||||
| 		.proc_handler	= proc_ipc_dointvec, | 		.proc_handler	= proc_ipc_dointvec, | ||||||
| 		.strategy	= sysctl_ipc_data, | 		.strategy	= sysctl_ipc_data, | ||||||
| 	}, | 	}, | ||||||
|  | 	{ | ||||||
|  | 		.ctl_name	= CTL_UNNUMBERED, | ||||||
|  | 		.procname	= "auto_msgmni", | ||||||
|  | 		.data		= &init_ipc_ns.auto_msgmni, | ||||||
|  | 		.maxlen		= sizeof(int), | ||||||
|  | 		.mode		= 0644, | ||||||
|  | 		.proc_handler	= proc_ipcauto_dointvec_minmax, | ||||||
|  | 		.extra1		= &zero, | ||||||
|  | 		.extra2		= &one, | ||||||
|  | 	}, | ||||||
| 	{} | 	{} | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self, | ||||||
| 
 | 
 | ||||||
| int register_ipcns_notifier(struct ipc_namespace *ns) | int register_ipcns_notifier(struct ipc_namespace *ns) | ||||||
| { | { | ||||||
|  | 	int rc; | ||||||
|  | 
 | ||||||
| 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); | 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); | ||||||
| 	ns->ipcns_nb.notifier_call = ipcns_callback; | 	ns->ipcns_nb.notifier_call = ipcns_callback; | ||||||
| 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; | 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; | ||||||
| 	return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); | 	rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); | ||||||
|  | 	if (!rc) | ||||||
|  | 		ns->auto_msgmni = 1; | ||||||
|  | 	return rc; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int cond_register_ipcns_notifier(struct ipc_namespace *ns) | int cond_register_ipcns_notifier(struct ipc_namespace *ns) | ||||||
| { | { | ||||||
|  | 	int rc; | ||||||
|  | 
 | ||||||
| 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); | 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); | ||||||
| 	ns->ipcns_nb.notifier_call = ipcns_callback; | 	ns->ipcns_nb.notifier_call = ipcns_callback; | ||||||
| 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; | 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; | ||||||
| 	return blocking_notifier_chain_cond_register(&ipcns_chain, | 	rc = blocking_notifier_chain_cond_register(&ipcns_chain, | ||||||
| 							&ns->ipcns_nb); | 							&ns->ipcns_nb); | ||||||
|  | 	if (!rc) | ||||||
|  | 		ns->auto_msgmni = 1; | ||||||
|  | 	return rc; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int unregister_ipcns_notifier(struct ipc_namespace *ns) | void unregister_ipcns_notifier(struct ipc_namespace *ns) | ||||||
| { | { | ||||||
| 	return blocking_notifier_chain_unregister(&ipcns_chain, | 	blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb); | ||||||
| 						&ns->ipcns_nb); | 	ns->auto_msgmni = 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int ipcns_notify(unsigned long val) | int ipcns_notify(unsigned long val) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Nadia Derbey
				Nadia Derbey