kthread: make kthread_create() killable

Any user process callers of wait_for_completion() except global init
process might be chosen by the OOM killer while waiting for completion()
call by some other process which does memory allocation.  See
CVE-2012-4398 "kernel: request_module() OOM local DoS" can happen.

When such users are chosen by the OOM killer when they are waiting for
completion() in TASK_UNINTERRUPTIBLE, the system will be kept stressed
due to memory starvation because the OOM killer cannot kill such users.

kthread_create() is one of such users and this patch fixes the problem
for kthreadd by making kthread_create() killable - the same approach
used for fixing CVE-2012-4398.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Tetsuo Handa 2013-11-12 15:06:45 -08:00 committed by Linus Torvalds
commit 786235eeba

View file

@ -33,7 +33,7 @@ struct kthread_create_info
/* Result passed back to kthread_create() from kthreadd. */ /* Result passed back to kthread_create() from kthreadd. */
struct task_struct *result; struct task_struct *result;
struct completion done; struct completion *done;
struct list_head list; struct list_head list;
}; };
@ -178,6 +178,7 @@ static int kthread(void *_create)
struct kthread_create_info *create = _create; struct kthread_create_info *create = _create;
int (*threadfn)(void *data) = create->threadfn; int (*threadfn)(void *data) = create->threadfn;
void *data = create->data; void *data = create->data;
struct completion *done;
struct kthread self; struct kthread self;
int ret; int ret;
@ -187,10 +188,16 @@ static int kthread(void *_create)
init_completion(&self.parked); init_completion(&self.parked);
current->vfork_done = &self.exited; current->vfork_done = &self.exited;
/* If user was SIGKILLed, I release the structure. */
done = xchg(&create->done, NULL);
if (!done) {
kfree(create);
do_exit(-EINTR);
}
/* OK, tell user we're spawned, wait for stop or wakeup */ /* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE); __set_current_state(TASK_UNINTERRUPTIBLE);
create->result = current; create->result = current;
complete(&create->done); complete(done);
schedule(); schedule();
ret = -EINTR; ret = -EINTR;
@ -223,8 +230,15 @@ static void create_kthread(struct kthread_create_info *create)
/* We want our own signal handler (we take no signals by default). */ /* We want our own signal handler (we take no signals by default). */
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
if (pid < 0) { if (pid < 0) {
/* If user was SIGKILLed, I release the structure. */
struct completion *done = xchg(&create->done, NULL);
if (!done) {
kfree(create);
return;
}
create->result = ERR_PTR(pid); create->result = ERR_PTR(pid);
complete(&create->done); complete(done);
} }
} }
@ -255,36 +269,59 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
const char namefmt[], const char namefmt[],
...) ...)
{ {
struct kthread_create_info create; DECLARE_COMPLETION_ONSTACK(done);
struct task_struct *task;
struct kthread_create_info *create = kmalloc(sizeof(*create),
GFP_KERNEL);
create.threadfn = threadfn; if (!create)
create.data = data; return ERR_PTR(-ENOMEM);
create.node = node; create->threadfn = threadfn;
init_completion(&create.done); create->data = data;
create->node = node;
create->done = &done;
spin_lock(&kthread_create_lock); spin_lock(&kthread_create_lock);
list_add_tail(&create.list, &kthread_create_list); list_add_tail(&create->list, &kthread_create_list);
spin_unlock(&kthread_create_lock); spin_unlock(&kthread_create_lock);
wake_up_process(kthreadd_task); wake_up_process(kthreadd_task);
wait_for_completion(&create.done); /*
* Wait for completion in killable state, for I might be chosen by
if (!IS_ERR(create.result)) { * the OOM killer while kthreadd is trying to allocate memory for
* new kernel thread.
*/
if (unlikely(wait_for_completion_killable(&done))) {
/*
* If I was SIGKILLed before kthreadd (or new kernel thread)
* calls complete(), leave the cleanup of this structure to
* that thread.
*/
if (xchg(&create->done, NULL))
return ERR_PTR(-ENOMEM);
/*
* kthreadd (or new kernel thread) will call complete()
* shortly.
*/
wait_for_completion(&done);
}
task = create->result;
if (!IS_ERR(task)) {
static const struct sched_param param = { .sched_priority = 0 }; static const struct sched_param param = { .sched_priority = 0 };
va_list args; va_list args;
va_start(args, namefmt); va_start(args, namefmt);
vsnprintf(create.result->comm, sizeof(create.result->comm), vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
namefmt, args);
va_end(args); va_end(args);
/* /*
* root may have changed our (kthreadd's) priority or CPU mask. * root may have changed our (kthreadd's) priority or CPU mask.
* The kernel thread should not inherit these properties. * The kernel thread should not inherit these properties.
*/ */
sched_setscheduler_nocheck(create.result, SCHED_NORMAL, &param); sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
set_cpus_allowed_ptr(create.result, cpu_all_mask); set_cpus_allowed_ptr(task, cpu_all_mask);
} }
return create.result; kfree(create);
return task;
} }
EXPORT_SYMBOL(kthread_create_on_node); EXPORT_SYMBOL(kthread_create_on_node);