Merge branch 'x86-seccomp-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 seccomp changes from Ingo Molnar:
 "This tree includes x86 seccomp filter speedups and related preparatory
  work, which touches core seccomp facilities as well.

  The main idea is to split seccomp into two phases, to be able to enter
  a simple fast path for syscalls with ptrace side effects.

  There's no substantial user-visible (and ABI) effects expected from
  this, except a change in how we emit a better audit record for
  SECCOMP_RET_TRACE events"

* 'x86-seccomp-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86_64, entry: Use split-phase syscall_trace_enter for 64-bit syscalls
  x86_64, entry: Treat regs->ax the same in fastpath and slowpath syscalls
  x86: Split syscall_trace_enter into two phases
  x86, entry: Only call user_exit if TIF_NOHZ
  x86, x32, audit: Fix x32's AUDIT_ARCH wrt audit
  seccomp: Document two-phase seccomp and arch-provided seccomp_data
  seccomp: Allow arch code to provide seccomp_data
  seccomp: Refactor the filter callback and the API
  seccomp,x86,arm,mips,s390: Remove nr parameter from secure_computing
This commit is contained in:
Linus Torvalds 2014-10-14 02:27:06 +02:00
commit ba1a96fc7d
11 changed files with 372 additions and 158 deletions

View file

@ -21,10 +21,11 @@
#include <linux/slab.h>
#include <linux/syscalls.h>
/* #define SECCOMP_DEBUG 1 */
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
#include <asm/syscall.h>
#endif
#ifdef CONFIG_SECCOMP_FILTER
#include <asm/syscall.h>
#include <linux/filter.h>
#include <linux/pid.h>
#include <linux/ptrace.h>
@ -172,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
*
* Returns valid seccomp BPF response codes.
*/
static u32 seccomp_run_filters(int syscall)
static u32 seccomp_run_filters(struct seccomp_data *sd)
{
struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
struct seccomp_data sd;
struct seccomp_data sd_local;
u32 ret = SECCOMP_RET_ALLOW;
/* Ensure unexpected behavior doesn't result in failing open. */
@ -185,14 +186,17 @@ static u32 seccomp_run_filters(int syscall)
/* Make sure cross-thread synced filter points somewhere sane. */
smp_read_barrier_depends();
populate_seccomp_data(&sd);
if (!sd) {
populate_seccomp_data(&sd_local);
sd = &sd_local;
}
/*
* All filters in the list are evaluated and the lowest BPF return
* value always takes priority (ignoring the DATA).
*/
for (; f; f = f->prev) {
u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
ret = cur_ret;
@ -563,11 +567,55 @@ static int mode1_syscalls_32[] = {
};
#endif
int __secure_computing(int this_syscall)
static void __secure_computing_strict(int this_syscall)
{
int exit_sig = 0;
int *syscall;
u32 ret;
int *syscall_whitelist = mode1_syscalls;
#ifdef CONFIG_COMPAT
if (is_compat_task())
syscall_whitelist = mode1_syscalls_32;
#endif
do {
if (*syscall_whitelist == this_syscall)
return;
} while (*++syscall_whitelist);
#ifdef SECCOMP_DEBUG
dump_stack();
#endif
audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
do_exit(SIGKILL);
}
#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
void secure_computing_strict(int this_syscall)
{
int mode = current->seccomp.mode;
if (mode == 0)
return;
else if (mode == SECCOMP_MODE_STRICT)
__secure_computing_strict(this_syscall);
else
BUG();
}
#else
int __secure_computing(void)
{
u32 phase1_result = seccomp_phase1(NULL);
if (likely(phase1_result == SECCOMP_PHASE1_OK))
return 0;
else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
return -1;
else
return seccomp_phase2(phase1_result);
}
#ifdef CONFIG_SECCOMP_FILTER
static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
{
u32 filter_ret, action;
int data;
/*
* Make sure that any changes to mode from another thread have
@ -575,86 +623,128 @@ int __secure_computing(int this_syscall)
*/
rmb();
switch (current->seccomp.mode) {
case SECCOMP_MODE_STRICT:
syscall = mode1_syscalls;
#ifdef CONFIG_COMPAT
if (is_compat_task())
syscall = mode1_syscalls_32;
#endif
do {
if (*syscall == this_syscall)
return 0;
} while (*++syscall);
exit_sig = SIGKILL;
ret = SECCOMP_RET_KILL;
break;
#ifdef CONFIG_SECCOMP_FILTER
case SECCOMP_MODE_FILTER: {
int data;
struct pt_regs *regs = task_pt_regs(current);
ret = seccomp_run_filters(this_syscall);
data = ret & SECCOMP_RET_DATA;
ret &= SECCOMP_RET_ACTION;
switch (ret) {
case SECCOMP_RET_ERRNO:
/* Set the low-order 16-bits as a errno. */
syscall_set_return_value(current, regs,
-data, 0);
goto skip;
case SECCOMP_RET_TRAP:
/* Show the handler the original registers. */
syscall_rollback(current, regs);
/* Let the filter pass back 16 bits of data. */
seccomp_send_sigsys(this_syscall, data);
goto skip;
case SECCOMP_RET_TRACE:
/* Skip these calls if there is no tracer. */
if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
syscall_set_return_value(current, regs,
-ENOSYS, 0);
goto skip;
}
/* Allow the BPF to provide the event message */
ptrace_event(PTRACE_EVENT_SECCOMP, data);
/*
* The delivery of a fatal signal during event
* notification may silently skip tracer notification.
* Terminating the task now avoids executing a system
* call that may not be intended.
*/
if (fatal_signal_pending(current))
break;
if (syscall_get_nr(current, regs) < 0)
goto skip; /* Explicit request to skip. */
filter_ret = seccomp_run_filters(sd);
data = filter_ret & SECCOMP_RET_DATA;
action = filter_ret & SECCOMP_RET_ACTION;
return 0;
case SECCOMP_RET_ALLOW:
return 0;
case SECCOMP_RET_KILL:
default:
break;
}
exit_sig = SIGSYS;
break;
switch (action) {
case SECCOMP_RET_ERRNO:
/* Set the low-order 16-bits as a errno. */
syscall_set_return_value(current, task_pt_regs(current),
-data, 0);
goto skip;
case SECCOMP_RET_TRAP:
/* Show the handler the original registers. */
syscall_rollback(current, task_pt_regs(current));
/* Let the filter pass back 16 bits of data. */
seccomp_send_sigsys(this_syscall, data);
goto skip;
case SECCOMP_RET_TRACE:
return filter_ret; /* Save the rest for phase 2. */
case SECCOMP_RET_ALLOW:
return SECCOMP_PHASE1_OK;
case SECCOMP_RET_KILL:
default:
audit_seccomp(this_syscall, SIGSYS, action);
do_exit(SIGSYS);
}
unreachable();
skip:
audit_seccomp(this_syscall, 0, action);
return SECCOMP_PHASE1_SKIP;
}
#endif
/**
* seccomp_phase1() - run fast path seccomp checks on the current syscall
* @arg sd: The seccomp_data or NULL
*
* This only reads pt_regs via the syscall_xyz helpers. The only change
* it will make to pt_regs is via syscall_set_return_value, and it will
* only do that if it returns SECCOMP_PHASE1_SKIP.
*
* If sd is provided, it will not read pt_regs at all.
*
* It may also call do_exit or force a signal; these actions must be
* safe.
*
* If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
* be processed normally.
*
* If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
* invoked. In this case, seccomp_phase1 will have set the return value
* using syscall_set_return_value.
*
* If it returns anything else, then the return value should be passed
* to seccomp_phase2 from a context in which ptrace hooks are safe.
*/
u32 seccomp_phase1(struct seccomp_data *sd)
{
int mode = current->seccomp.mode;
int this_syscall = sd ? sd->nr :
syscall_get_nr(current, task_pt_regs(current));
switch (mode) {
case SECCOMP_MODE_STRICT:
__secure_computing_strict(this_syscall); /* may call do_exit */
return SECCOMP_PHASE1_OK;
#ifdef CONFIG_SECCOMP_FILTER
case SECCOMP_MODE_FILTER:
return __seccomp_phase1_filter(this_syscall, sd);
#endif
default:
BUG();
}
#ifdef SECCOMP_DEBUG
dump_stack();
#endif
audit_seccomp(this_syscall, exit_sig, ret);
do_exit(exit_sig);
#ifdef CONFIG_SECCOMP_FILTER
skip:
audit_seccomp(this_syscall, exit_sig, ret);
#endif
return -1;
}
/**
* seccomp_phase2() - finish slow path seccomp work for the current syscall
* @phase1_result: The return value from seccomp_phase1()
*
* This must be called from a context in which ptrace hooks can be used.
*
* Returns 0 if the syscall should be processed or -1 to skip the syscall.
*/
int seccomp_phase2(u32 phase1_result)
{
struct pt_regs *regs = task_pt_regs(current);
u32 action = phase1_result & SECCOMP_RET_ACTION;
int data = phase1_result & SECCOMP_RET_DATA;
BUG_ON(action != SECCOMP_RET_TRACE);
audit_seccomp(syscall_get_nr(current, regs), 0, action);
/* Skip these calls if there is no tracer. */
if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
syscall_set_return_value(current, regs,
-ENOSYS, 0);
return -1;
}
/* Allow the BPF to provide the event message */
ptrace_event(PTRACE_EVENT_SECCOMP, data);
/*
* The delivery of a fatal signal during event
* notification may silently skip tracer notification.
* Terminating the task now avoids executing a system
* call that may not be intended.
*/
if (fatal_signal_pending(current))
do_exit(SIGSYS);
if (syscall_get_nr(current, regs) < 0)
return -1; /* Explicit request to skip. */
return 0;
}
#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
long prctl_get_seccomp(void)
{
return current->seccomp.mode;