 f431b634f2
			
		
	
	
	f431b634f2
	
	
	
		
			
			The tracing of ia32 compat system calls has been a bit of a pain as they
use different system call numbers than the 64bit equivalents.
I wrote a simple 'lls' program that lists files. I compiled it as a i686
ELF binary and ran it under a x86_64 box. This is the result:
echo 0 > /debug/tracing/tracing_on
echo 1 > /debug/tracing/events/syscalls/enable
echo 1 > /debug/tracing/tracing_on ; ./lls ; echo 0 > /debug/tracing/tracing_on
grep lls /debug/tracing/trace
[.. skipping calls before TS_COMPAT is set ...]
             lls-1127  [005] d...   936.409188: sys_recvfrom(fd: 0, ubuf: 4d560fc4, size: 0, flags: 8048034, addr: 8, addr_len: f7700420)
             lls-1127  [005] d...   936.409190: sys_recvfrom -> 0x8a77000
             lls-1127  [005] d...   936.409211: sys_lgetxattr(pathname: 0, name: 1000, value: 3, size: 22)
             lls-1127  [005] d...   936.409215: sys_lgetxattr -> 0xf76ff000
             lls-1127  [005] d...   936.409223: sys_dup2(oldfd: 4d55ae9b, newfd: 4)
             lls-1127  [005] d...   936.409228: sys_dup2 -> 0xfffffffffffffffe
             lls-1127  [005] d...   936.409236: sys_newfstat(fd: 4d55b085, statbuf: 80000)
             lls-1127  [005] d...   936.409242: sys_newfstat -> 0x3
             lls-1127  [005] d...   936.409243: sys_removexattr(pathname: 3, name: ffcd0060)
             lls-1127  [005] d...   936.409244: sys_removexattr -> 0x0
             lls-1127  [005] d...   936.409245: sys_lgetxattr(pathname: 0, name: 19614, value: 1, size: 2)
             lls-1127  [005] d...   936.409248: sys_lgetxattr -> 0xf76e5000
             lls-1127  [005] d...   936.409248: sys_newlstat(filename: 3, statbuf: 19614)
             lls-1127  [005] d...   936.409249: sys_newlstat -> 0x0
             lls-1127  [005] d...   936.409262: sys_newfstat(fd: f76fb588, statbuf: 80000)
             lls-1127  [005] d...   936.409279: sys_newfstat -> 0x3
             lls-1127  [005] d...   936.409279: sys_close(fd: 3)
             lls-1127  [005] d...   936.421550: sys_close -> 0x200
             lls-1127  [005] d...   936.421558: sys_removexattr(pathname: 3, name: ffcd00d0)
             lls-1127  [005] d...   936.421560: sys_removexattr -> 0x0
             lls-1127  [005] d...   936.421569: sys_lgetxattr(pathname: 4d564000, name: 1b1abc, value: 5, size: 802)
             lls-1127  [005] d...   936.421574: sys_lgetxattr -> 0x4d564000
             lls-1127  [005] d...   936.421575: sys_capget(header: 4d70f000, dataptr: 1000)
             lls-1127  [005] d...   936.421580: sys_capget -> 0x0
             lls-1127  [005] d...   936.421580: sys_lgetxattr(pathname: 4d710000, name: 3000, value: 3, size: 812)
             lls-1127  [005] d...   936.421589: sys_lgetxattr -> 0x4d710000
             lls-1127  [005] d...   936.426130: sys_lgetxattr(pathname: 4d713000, name: 2abc, value: 3, size: 32)
             lls-1127  [005] d...   936.426141: sys_lgetxattr -> 0x4d713000
             lls-1127  [005] d...   936.426145: sys_newlstat(filename: 3, statbuf: f76ff3f0)
             lls-1127  [005] d...   936.426146: sys_newlstat -> 0x0
             lls-1127  [005] d...   936.431748: sys_lgetxattr(pathname: 0, name: 1000, value: 3, size: 22)
Obviously I'm not calling newfstat with a fd of 4d55b085. The calls are
obviously incorrect, and confusing.
Other efforts have been made to fix this:
https://lkml.org/lkml/2012/3/26/367
But the real solution is to rewrite the syscall internals and come up
with a fixed solution. One that doesn't require all the kluge that the
current solution has.
Thus for now, instead of outputting incorrect data, simply ignore them.
With this patch the changes now have:
 #> grep lls /debug/tracing/trace
 #>
Compat system calls simply are not traced. If users need compat
syscalls, then they should just use the raw syscall tracepoints.
For an architecture to make their compat syscalls ignored, it must
define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS (done in asm/ftrace.h) and also
define an arch_trace_is_compat_syscall() function that will return true
if the current task should ignore tracing the syscall.
I want to stress that this change does not affect actual syscalls in any
way, shape or form. It is only used within the tracing system and
doesn't interfere with the syscall logic at all. The changes are
consolidated nicely into trace_syscalls.c and asm/ftrace.h.
I had to make one small modification to asm/thread_info.h and that was
to remove the include of asm/ftrace.h. As asm/ftrace.h required the
current_thread_info() it was causing include hell. That include was
added back in 2008 when the function graph tracer was added:
 commit caf4b323 "tracing, x86: add low level support for ftrace return tracing"
It does not need to be included there.
Link: http://lkml.kernel.org/r/1360703939.21867.99.camel@gandalf.local.home
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
		
	
			
		
			
				
	
	
		
			289 lines
		
	
	
	
		
			8.9 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			289 lines
		
	
	
	
		
			8.9 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /* thread_info.h: low-level thread information
 | |
|  *
 | |
|  * Copyright (C) 2002  David Howells (dhowells@redhat.com)
 | |
|  * - Incorporating suggestions made by Linus Torvalds and Dave Miller
 | |
|  */
 | |
| 
 | |
| #ifndef _ASM_X86_THREAD_INFO_H
 | |
| #define _ASM_X86_THREAD_INFO_H
 | |
| 
 | |
| #include <linux/compiler.h>
 | |
| #include <asm/page.h>
 | |
| #include <asm/types.h>
 | |
| 
 | |
| /*
 | |
|  * low level task data that entry.S needs immediate access to
 | |
|  * - this struct should fit entirely inside of one cache line
 | |
|  * - this struct shares the supervisor stack pages
 | |
|  */
 | |
| #ifndef __ASSEMBLY__
 | |
| struct task_struct;
 | |
| struct exec_domain;
 | |
| #include <asm/processor.h>
 | |
| #include <linux/atomic.h>
 | |
| 
 | |
| struct thread_info {
 | |
| 	struct task_struct	*task;		/* main task structure */
 | |
| 	struct exec_domain	*exec_domain;	/* execution domain */
 | |
| 	__u32			flags;		/* low level flags */
 | |
| 	__u32			status;		/* thread synchronous flags */
 | |
| 	__u32			cpu;		/* current CPU */
 | |
| 	int			preempt_count;	/* 0 => preemptable,
 | |
| 						   <0 => BUG */
 | |
| 	mm_segment_t		addr_limit;
 | |
| 	struct restart_block    restart_block;
 | |
| 	void __user		*sysenter_return;
 | |
| #ifdef CONFIG_X86_32
 | |
| 	unsigned long           previous_esp;   /* ESP of the previous stack in
 | |
| 						   case of nested (IRQ) stacks
 | |
| 						*/
 | |
| 	__u8			supervisor_stack[0];
 | |
| #endif
 | |
| 	unsigned int		sig_on_uaccess_error:1;
 | |
| 	unsigned int		uaccess_err:1;	/* uaccess failed */
 | |
| };
 | |
| 
 | |
| #define INIT_THREAD_INFO(tsk)			\
 | |
| {						\
 | |
| 	.task		= &tsk,			\
 | |
| 	.exec_domain	= &default_exec_domain,	\
 | |
| 	.flags		= 0,			\
 | |
| 	.cpu		= 0,			\
 | |
| 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 | |
| 	.addr_limit	= KERNEL_DS,		\
 | |
| 	.restart_block = {			\
 | |
| 		.fn = do_no_restart_syscall,	\
 | |
| 	},					\
 | |
| }
 | |
| 
 | |
| #define init_thread_info	(init_thread_union.thread_info)
 | |
| #define init_stack		(init_thread_union.stack)
 | |
| 
 | |
| #else /* !__ASSEMBLY__ */
 | |
| 
 | |
| #include <asm/asm-offsets.h>
 | |
| 
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * thread information flags
 | |
|  * - these are process state flags that various assembly files
 | |
|  *   may need to access
 | |
|  * - pending work-to-be-done flags are in LSW
 | |
|  * - other flags in MSW
 | |
|  * Warning: layout of LSW is hardcoded in entry.S
 | |
|  */
 | |
| #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 | |
| #define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 | |
| #define TIF_SIGPENDING		2	/* signal pending */
 | |
| #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 | |
| #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
 | |
| #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 | |
| #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 | |
| #define TIF_SECCOMP		8	/* secure computing */
 | |
| #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
 | |
| #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 | |
| #define TIF_UPROBE		12	/* breakpointed or singlestepping */
 | |
| #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 | |
| #define TIF_IA32		17	/* IA32 compatibility process */
 | |
| #define TIF_FORK		18	/* ret_from_fork */
 | |
| #define TIF_NOHZ		19	/* in adaptive nohz mode */
 | |
| #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 | |
| #define TIF_DEBUG		21	/* uses debug registers */
 | |
| #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
 | |
| #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 | |
| #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
 | |
| #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
 | |
| #define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
 | |
| #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
 | |
| #define TIF_X32			30	/* 32-bit native x86-64 binary */
 | |
| 
 | |
| #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 | |
| #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 | |
| #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 | |
| #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 | |
| #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 | |
| #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 | |
| #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 | |
| #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 | |
| #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
 | |
| #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 | |
| #define _TIF_UPROBE		(1 << TIF_UPROBE)
 | |
| #define _TIF_NOTSC		(1 << TIF_NOTSC)
 | |
| #define _TIF_IA32		(1 << TIF_IA32)
 | |
| #define _TIF_FORK		(1 << TIF_FORK)
 | |
| #define _TIF_NOHZ		(1 << TIF_NOHZ)
 | |
| #define _TIF_DEBUG		(1 << TIF_DEBUG)
 | |
| #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 | |
| #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
 | |
| #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
 | |
| #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 | |
| #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 | |
| #define _TIF_ADDR32		(1 << TIF_ADDR32)
 | |
| #define _TIF_X32		(1 << TIF_X32)
 | |
| 
 | |
| /* work to do in syscall_trace_enter() */
 | |
| #define _TIF_WORK_SYSCALL_ENTRY	\
 | |
| 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |	\
 | |
| 	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT |	\
 | |
| 	 _TIF_NOHZ)
 | |
| 
 | |
| /* work to do in syscall_trace_leave() */
 | |
| #define _TIF_WORK_SYSCALL_EXIT	\
 | |
| 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP |	\
 | |
| 	 _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
 | |
| 
 | |
| /* work to do on interrupt/exception return */
 | |
| #define _TIF_WORK_MASK							\
 | |
| 	(0x0000FFFF &							\
 | |
| 	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|			\
 | |
| 	   _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
 | |
| 
 | |
| /* work to do on any return to user space */
 | |
| #define _TIF_ALLWORK_MASK						\
 | |
| 	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT |	\
 | |
| 	_TIF_NOHZ)
 | |
| 
 | |
| /* Only used for 64 bit */
 | |
| #define _TIF_DO_NOTIFY_MASK						\
 | |
| 	(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME |	\
 | |
| 	 _TIF_USER_RETURN_NOTIFY)
 | |
| 
 | |
| /* flags to check in __switch_to() */
 | |
| #define _TIF_WORK_CTXSW							\
 | |
| 	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
 | |
| 
 | |
| #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 | |
| #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
 | |
| 
 | |
| #define PREEMPT_ACTIVE		0x10000000
 | |
| 
 | |
| #ifdef CONFIG_X86_32
 | |
| 
 | |
| #define STACK_WARN	(THREAD_SIZE/8)
 | |
| /*
 | |
|  * macros/functions for gaining access to the thread information structure
 | |
|  *
 | |
|  * preempt_count needs to be 1 initially, until the scheduler is functional.
 | |
|  */
 | |
| #ifndef __ASSEMBLY__
 | |
| 
 | |
| 
 | |
| /* how to get the current stack pointer from C */
 | |
| register unsigned long current_stack_pointer asm("esp") __used;
 | |
| 
 | |
| /* how to get the thread information struct from C */
 | |
| static inline struct thread_info *current_thread_info(void)
 | |
| {
 | |
| 	return (struct thread_info *)
 | |
| 		(current_stack_pointer & ~(THREAD_SIZE - 1));
 | |
| }
 | |
| 
 | |
| #else /* !__ASSEMBLY__ */
 | |
| 
 | |
| /* how to get the thread information struct from ASM */
 | |
| #define GET_THREAD_INFO(reg)	 \
 | |
| 	movl $-THREAD_SIZE, reg; \
 | |
| 	andl %esp, reg
 | |
| 
 | |
| /* use this one if reg already contains %esp */
 | |
| #define GET_THREAD_INFO_WITH_ESP(reg) \
 | |
| 	andl $-THREAD_SIZE, reg
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #else /* X86_32 */
 | |
| 
 | |
| #include <asm/percpu.h>
 | |
| #define KERNEL_STACK_OFFSET (5*8)
 | |
| 
 | |
| /*
 | |
|  * macros/functions for gaining access to the thread information structure
 | |
|  * preempt_count needs to be 1 initially, until the scheduler is functional.
 | |
|  */
 | |
| #ifndef __ASSEMBLY__
 | |
| DECLARE_PER_CPU(unsigned long, kernel_stack);
 | |
| 
 | |
| static inline struct thread_info *current_thread_info(void)
 | |
| {
 | |
| 	struct thread_info *ti;
 | |
| 	ti = (void *)(this_cpu_read_stable(kernel_stack) +
 | |
| 		      KERNEL_STACK_OFFSET - THREAD_SIZE);
 | |
| 	return ti;
 | |
| }
 | |
| 
 | |
| #else /* !__ASSEMBLY__ */
 | |
| 
 | |
| /* how to get the thread information struct from ASM */
 | |
| #define GET_THREAD_INFO(reg) \
 | |
| 	movq PER_CPU_VAR(kernel_stack),reg ; \
 | |
| 	subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
 | |
| 
 | |
| /*
 | |
|  * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
 | |
|  * a certain register (to be used in assembler memory operands).
 | |
|  */
 | |
| #define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #endif /* !X86_32 */
 | |
| 
 | |
| /*
 | |
|  * Thread-synchronous status.
 | |
|  *
 | |
|  * This is different from the flags in that nobody else
 | |
|  * ever touches our thread-synchronous status, so we don't
 | |
|  * have to worry about atomic accesses.
 | |
|  */
 | |
| #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 | |
| #define TS_POLLING		0x0004	/* idle task polling need_resched,
 | |
| 					   skip sending interrupt */
 | |
| #define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
 | |
| 
 | |
| #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
 | |
| 
 | |
| #ifndef __ASSEMBLY__
 | |
| #define HAVE_SET_RESTORE_SIGMASK	1
 | |
| static inline void set_restore_sigmask(void)
 | |
| {
 | |
| 	struct thread_info *ti = current_thread_info();
 | |
| 	ti->status |= TS_RESTORE_SIGMASK;
 | |
| 	WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
 | |
| }
 | |
| static inline void clear_restore_sigmask(void)
 | |
| {
 | |
| 	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 | |
| }
 | |
| static inline bool test_restore_sigmask(void)
 | |
| {
 | |
| 	return current_thread_info()->status & TS_RESTORE_SIGMASK;
 | |
| }
 | |
| static inline bool test_and_clear_restore_sigmask(void)
 | |
| {
 | |
| 	struct thread_info *ti = current_thread_info();
 | |
| 	if (!(ti->status & TS_RESTORE_SIGMASK))
 | |
| 		return false;
 | |
| 	ti->status &= ~TS_RESTORE_SIGMASK;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| static inline bool is_ia32_task(void)
 | |
| {
 | |
| #ifdef CONFIG_X86_32
 | |
| 	return true;
 | |
| #endif
 | |
| #ifdef CONFIG_IA32_EMULATION
 | |
| 	if (current_thread_info()->status & TS_COMPAT)
 | |
| 		return true;
 | |
| #endif
 | |
| 	return false;
 | |
| }
 | |
| #endif	/* !__ASSEMBLY__ */
 | |
| 
 | |
| #ifndef __ASSEMBLY__
 | |
| extern void arch_task_cache_init(void);
 | |
| extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 | |
| extern void arch_release_task_struct(struct task_struct *tsk);
 | |
| #endif
 | |
| #endif /* _ASM_X86_THREAD_INFO_H */
 |