Merge branch 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull user-space probe instrumentation from Ingo Molnar:
 "The uprobes code originates from SystemTap and has been used for years
  in Fedora and RHEL kernels.  This version is much rewritten, reviews
  from PeterZ, Oleg and myself shaped the end result.

  This tree includes uprobes support in 'perf probe' - but SystemTap
  (and other tools) can take advantage of user probe points as well.

  Sample usage of uprobes via perf, for example to profile malloc()
  calls without modifying user-space binaries.

  First boot a new kernel with CONFIG_UPROBE_EVENT=y enabled.

  If you don't know which function you want to probe you can pick one
  from 'perf top' or can get a list all functions that can be probed
  within libc (binaries can be specified as well):

	$ perf probe -F -x /lib/libc.so.6

  To probe libc's malloc():

	$ perf probe -x /lib64/libc.so.6 malloc
	Added new event:
	probe_libc:malloc    (on 0x7eac0)

  You can now use it in all perf tools, such as:

	perf record -e probe_libc:malloc -aR sleep 1

  Make use of it to create a call graph (as the flat profile is going to
  look very boring):

	$ perf record -e probe_libc:malloc -gR make
	[ perf record: Woken up 173 times to write data ]
	[ perf record: Captured and wrote 44.190 MB perf.data (~1930712

	$ perf report | less

	  32.03%            git  libc-2.15.so   [.] malloc
	                    |
	                    --- malloc

	  29.49%            cc1  libc-2.15.so   [.] malloc
	                    |
	                    --- malloc
	                       |
	                       |--0.95%-- 0x208eb1000000000
	                       |
	                       |--0.63%-- htab_traverse_noresize

	  11.04%             as  libc-2.15.so   [.] malloc
	                     |
	                     --- malloc
	                        |

	   7.15%             ld  libc-2.15.so   [.] malloc
	                     |
	                     --- malloc
	                        |

	   5.07%             sh  libc-2.15.so   [.] malloc
	                     |
	                     --- malloc
	                        |
	   4.99%  python-config  libc-2.15.so   [.] malloc
	          |
	          --- malloc
	             |
	   4.54%           make  libc-2.15.so   [.] malloc
	                   |
	                   --- malloc
	                      |
	                      |--7.34%-- glob
	                      |          |
	                      |          |--93.18%-- 0x41588f
	                      |          |
	                      |           --6.82%-- glob
	                      |                     0x41588f

	   ...

  Or:

	$ perf report -g flat | less

	# Overhead        Command  Shared Object      Symbol
	# ........  .............  .............  ..........
	#
	  32.03%            git  libc-2.15.so   [.] malloc
	          27.19%
	              malloc

	  29.49%            cc1  libc-2.15.so   [.] malloc
	          24.77%
	              malloc

	  11.04%             as  libc-2.15.so   [.] malloc
	          11.02%
	              malloc

	   7.15%             ld  libc-2.15.so   [.] malloc
	           6.57%
	              malloc

	 ...

  The core uprobes design is fairly straightforward: uprobes probe
  points register themselves at (inode:offset) addresses of
  libraries/binaries, after which all existing (or new) vmas that map
  that address will have a software breakpoint injected at that address.
  vmas are COW-ed to preserve original content.  The probe points are
  kept in an rbtree.

  If user-space executes the probed inode:offset instruction address
  then an event is generated which can be recovered from the regular
  perf event channels and mmap-ed ring-buffer.

  Multiple probes at the same address are supported, they create a
  dynamic callback list of event consumers.

  The basic model is further complicated by the XOL speedup: the
  original instruction that is probed is copied (in an architecture
  specific fashion) and executed out of line when the probe triggers.
  The XOL area is a single vma per process, with a fixed number of
  entries (which limits probe execution parallelism).

  The API: uprobes are installed/removed via
  /sys/kernel/debug/tracing/uprobe_events, the API is integrated to
  align with the kprobes interface as much as possible, but is separate
  to it.

  Injecting a probe point is privileged operation, which can be relaxed
  by setting perf_paranoid to -1.

  You can use multiple probes as well and mix them with kprobes and
  regular PMU events or tracepoints, when instrumenting a task."

Fix up trivial conflicts in mm/memory.c due to previous cleanup of
unmap_single_vma().

* 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  perf probe: Detect probe target when m/x options are absent
  perf probe: Provide perf interface for uprobes
  tracing: Fix kconfig warning due to a typo
  tracing: Provide trace events interface for uprobes
  tracing: Extract out common code for kprobes/uprobes trace events
  tracing: Modify is_delete, is_return from int to bool
  uprobes/core: Decrement uprobe count before the pages are unmapped
  uprobes/core: Make background page replacement logic account for rss_stat counters
  uprobes/core: Optimize probe hits with the help of a counter
  uprobes/core: Allocate XOL slots for uprobes use
  uprobes/core: Handle breakpoint and singlestep exceptions
  uprobes/core: Rename bkpt to swbp
  uprobes/core: Make order of function parameters consistent across functions
  uprobes/core: Make macro names consistent
  uprobes: Update copyright notices
  uprobes/core: Move insn to arch specific structure
  uprobes/core: Remove uprobe_opcode_sz
  uprobes/core: Make instruction tables volatile
  uprobes: Move to kernel/events/
  uprobes/core: Clean up, refactor and improve the code
  ...
This commit is contained in:
Linus Torvalds 2012-05-24 11:39:34 -07:00
commit 654443e20d
30 changed files with 5053 additions and 984 deletions

View file

@ -12,6 +12,7 @@
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/page-debug-flags.h>
#include <linux/uprobes.h>
#include <asm/page.h>
#include <asm/mmu.h>
@ -388,6 +389,7 @@ struct mm_struct {
#ifdef CONFIG_CPUMASK_OFFSTACK
struct cpumask cpumask_allocation;
#endif
struct uprobes_state uprobes_state;
};
static inline void mm_init_cpumask(struct mm_struct *mm)

View file

@ -1572,6 +1572,10 @@ struct task_struct {
#ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;
#endif
#ifdef CONFIG_UPROBES
struct uprobe_task *utask;
int uprobe_srcu_id;
#endif
};
/* Future-safe accessor for struct task_struct's cpus_allowed. */

165
include/linux/uprobes.h Normal file
View file

@ -0,0 +1,165 @@
#ifndef _LINUX_UPROBES_H
#define _LINUX_UPROBES_H
/*
* User-space Probes (UProbes)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2008-2012
* Authors:
* Srikar Dronamraju
* Jim Keniston
* Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
*/
#include <linux/errno.h>
#include <linux/rbtree.h>
struct vm_area_struct;
struct mm_struct;
struct inode;
#ifdef CONFIG_ARCH_SUPPORTS_UPROBES
# include <asm/uprobes.h>
#endif
/* flags that denote/change uprobes behaviour */
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0x1
/* Dont run handlers when first register/ last unregister in progress*/
#define UPROBE_RUN_HANDLER 0x2
/* Can skip singlestep */
#define UPROBE_SKIP_SSTEP 0x4
struct uprobe_consumer {
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
/*
* filter is optional; If a filter exists, handler is run
* if and only if filter returns true.
*/
bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
struct uprobe_consumer *next;
};
#ifdef CONFIG_UPROBES
enum uprobe_task_state {
UTASK_RUNNING,
UTASK_BP_HIT,
UTASK_SSTEP,
UTASK_SSTEP_ACK,
UTASK_SSTEP_TRAPPED,
};
/*
* uprobe_task: Metadata of a task while it singlesteps.
*/
struct uprobe_task {
enum uprobe_task_state state;
struct arch_uprobe_task autask;
struct uprobe *active_uprobe;
unsigned long xol_vaddr;
unsigned long vaddr;
};
/*
* On a breakpoint hit, thread contests for a slot. It frees the
* slot after singlestep. Currently a fixed number of slots are
* allocated.
*/
struct xol_area {
wait_queue_head_t wq; /* if all slots are busy */
atomic_t slot_count; /* number of in-use slots */
unsigned long *bitmap; /* 0 = free slot */
struct page *page;
/*
* We keep the vma's vm_start rather than a pointer to the vma
* itself. The probed process or a naughty kernel module could make
* the vma go away, and we must handle that reasonably gracefully.
*/
unsigned long vaddr; /* Page(s) of instruction slots */
};
struct uprobes_state {
struct xol_area *xol_area;
atomic_t count;
};
extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify);
extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
extern int uprobe_mmap(struct vm_area_struct *vma);
extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
extern void uprobe_free_utask(struct task_struct *t);
extern void uprobe_copy_process(struct task_struct *t);
extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
extern void uprobe_notify_resume(struct pt_regs *regs);
extern bool uprobe_deny_signal(void);
extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
extern void uprobe_clear_state(struct mm_struct *mm);
extern void uprobe_reset_state(struct mm_struct *mm);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
static inline int
uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{
return -ENOSYS;
}
static inline void
uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{
}
static inline int uprobe_mmap(struct vm_area_struct *vma)
{
return 0;
}
static inline void
uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
}
static inline void uprobe_notify_resume(struct pt_regs *regs)
{
}
static inline bool uprobe_deny_signal(void)
{
return false;
}
static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
{
return 0;
}
static inline void uprobe_free_utask(struct task_struct *t)
{
}
static inline void uprobe_copy_process(struct task_struct *t)
{
}
static inline void uprobe_clear_state(struct mm_struct *mm)
{
}
static inline void uprobe_reset_state(struct mm_struct *mm)
{
}
#endif /* !CONFIG_UPROBES */
#endif /* _LINUX_UPROBES_H */