When saving the callchain on Power, the kernel conservatively saves excess
entries in the callchain. A few of these entries are needed in some cases
but not others. We should use the DWARF debug information to determine
when the entries are needed.
Eg: the value in the link register (LR) is needed only when it holds the
return address of a function. At other times it must be ignored.
If the unnecessary entries are not ignored, we end up with duplicate arcs
in the call-graphs.
Use the DWARF debug information to determine if any callchain entries
should be ignored when building call-graphs.
Callgraph before the patch:
14.67% 2234 sprintft libc-2.18.so [.] __random
|
--- __random
|
|--61.12%-- __random
| |
| |--97.15%-- rand
| | do_my_sprintf
| | main
| | generic_start_main.isra.0
| | __libc_start_main
| | 0x0
| |
| --2.85%-- do_my_sprintf
| main
| generic_start_main.isra.0
| __libc_start_main
| 0x0
|
--38.88%-- rand
|
|--94.01%-- rand
| do_my_sprintf
| main
| generic_start_main.isra.0
| __libc_start_main
| 0x0
|
--5.99%-- do_my_sprintf
main
generic_start_main.isra.0
__libc_start_main
0x0
Callgraph after the patch:
14.67% 2234 sprintft libc-2.18.so [.] __random
|
--- __random
|
|--95.93%-- rand
| do_my_sprintf
| main
| generic_start_main.isra.0
| __libc_start_main
| 0x0
|
--4.07%-- do_my_sprintf
main
generic_start_main.isra.0
__libc_start_main
0x0
TODO: For split-debug info objects like glibc, we can only determine
the call-frame-address only when both .eh_frame and .debug_info
sections are available. We should be able to determin the CFA
even without the .eh_frame section.
Fix suggested by Anton Blanchard.
Thanks to valuable input on DWARF debug information from Ulrich Weigand.
Reported-by: Maynard Johnson <maynard@us.ibm.com>
Tested-by: Maynard Johnson <maynard@us.ibm.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20140625154903.GA29607@us.ibm.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
192 lines
4.6 KiB
C
192 lines
4.6 KiB
C
#ifndef __PERF_CALLCHAIN_H
|
|
#define __PERF_CALLCHAIN_H
|
|
|
|
#include "../perf.h"
|
|
#include <linux/list.h>
|
|
#include <linux/rbtree.h>
|
|
#include "event.h"
|
|
#include "symbol.h"
|
|
|
|
enum perf_call_graph_mode {
|
|
CALLCHAIN_NONE,
|
|
CALLCHAIN_FP,
|
|
CALLCHAIN_DWARF,
|
|
CALLCHAIN_MAX
|
|
};
|
|
|
|
enum chain_mode {
|
|
CHAIN_NONE,
|
|
CHAIN_FLAT,
|
|
CHAIN_GRAPH_ABS,
|
|
CHAIN_GRAPH_REL
|
|
};
|
|
|
|
enum chain_order {
|
|
ORDER_CALLER,
|
|
ORDER_CALLEE
|
|
};
|
|
|
|
struct callchain_node {
|
|
struct callchain_node *parent;
|
|
struct list_head val;
|
|
struct rb_node rb_node_in; /* to insert nodes in an rbtree */
|
|
struct rb_node rb_node; /* to sort nodes in an output tree */
|
|
struct rb_root rb_root_in; /* input tree of children */
|
|
struct rb_root rb_root; /* sorted output tree of children */
|
|
unsigned int val_nr;
|
|
u64 hit;
|
|
u64 children_hit;
|
|
};
|
|
|
|
struct callchain_root {
|
|
u64 max_depth;
|
|
struct callchain_node node;
|
|
};
|
|
|
|
struct callchain_param;
|
|
|
|
typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
|
|
u64, struct callchain_param *);
|
|
|
|
enum chain_key {
|
|
CCKEY_FUNCTION,
|
|
CCKEY_ADDRESS
|
|
};
|
|
|
|
struct callchain_param {
|
|
enum chain_mode mode;
|
|
u32 print_limit;
|
|
double min_percent;
|
|
sort_chain_func_t sort;
|
|
enum chain_order order;
|
|
enum chain_key key;
|
|
};
|
|
|
|
struct callchain_list {
|
|
u64 ip;
|
|
struct map_symbol ms;
|
|
struct list_head list;
|
|
};
|
|
|
|
/*
|
|
* A callchain cursor is a single linked list that
|
|
* let one feed a callchain progressively.
|
|
* It keeps persistent allocated entries to minimize
|
|
* allocations.
|
|
*/
|
|
struct callchain_cursor_node {
|
|
u64 ip;
|
|
struct map *map;
|
|
struct symbol *sym;
|
|
struct callchain_cursor_node *next;
|
|
};
|
|
|
|
struct callchain_cursor {
|
|
u64 nr;
|
|
struct callchain_cursor_node *first;
|
|
struct callchain_cursor_node **last;
|
|
u64 pos;
|
|
struct callchain_cursor_node *curr;
|
|
};
|
|
|
|
extern __thread struct callchain_cursor callchain_cursor;
|
|
|
|
static inline void callchain_init(struct callchain_root *root)
|
|
{
|
|
INIT_LIST_HEAD(&root->node.val);
|
|
|
|
root->node.parent = NULL;
|
|
root->node.hit = 0;
|
|
root->node.children_hit = 0;
|
|
root->node.rb_root_in = RB_ROOT;
|
|
root->max_depth = 0;
|
|
}
|
|
|
|
static inline u64 callchain_cumul_hits(struct callchain_node *node)
|
|
{
|
|
return node->hit + node->children_hit;
|
|
}
|
|
|
|
int callchain_register_param(struct callchain_param *param);
|
|
int callchain_append(struct callchain_root *root,
|
|
struct callchain_cursor *cursor,
|
|
u64 period);
|
|
|
|
int callchain_merge(struct callchain_cursor *cursor,
|
|
struct callchain_root *dst, struct callchain_root *src);
|
|
|
|
/*
|
|
* Initialize a cursor before adding entries inside, but keep
|
|
* the previously allocated entries as a cache.
|
|
*/
|
|
static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
|
|
{
|
|
cursor->nr = 0;
|
|
cursor->last = &cursor->first;
|
|
}
|
|
|
|
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
|
|
struct map *map, struct symbol *sym);
|
|
|
|
/* Close a cursor writing session. Initialize for the reader */
|
|
static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
|
|
{
|
|
cursor->curr = cursor->first;
|
|
cursor->pos = 0;
|
|
}
|
|
|
|
/* Cursor reading iteration helpers */
|
|
static inline struct callchain_cursor_node *
|
|
callchain_cursor_current(struct callchain_cursor *cursor)
|
|
{
|
|
if (cursor->pos == cursor->nr)
|
|
return NULL;
|
|
|
|
return cursor->curr;
|
|
}
|
|
|
|
static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
|
|
{
|
|
cursor->curr = cursor->curr->next;
|
|
cursor->pos++;
|
|
}
|
|
|
|
struct option;
|
|
struct hist_entry;
|
|
|
|
int record_parse_callchain(const char *arg, struct record_opts *opts);
|
|
int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
|
|
int record_callchain_opt(const struct option *opt, const char *arg, int unset);
|
|
|
|
int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent,
|
|
struct perf_evsel *evsel, struct addr_location *al,
|
|
int max_stack);
|
|
int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
|
|
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
|
|
bool hide_unresolved);
|
|
|
|
extern const char record_callchain_help[];
|
|
int parse_callchain_report_opt(const char *arg);
|
|
|
|
static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
|
|
struct callchain_cursor *src)
|
|
{
|
|
*dest = *src;
|
|
|
|
dest->first = src->curr;
|
|
dest->nr -= src->pos;
|
|
}
|
|
|
|
#ifdef HAVE_SKIP_CALLCHAIN_IDX
|
|
extern int arch_skip_callchain_idx(struct machine *machine,
|
|
struct thread *thread, struct ip_callchain *chain);
|
|
#else
|
|
static inline int arch_skip_callchain_idx(struct machine *machine __maybe_unused,
|
|
struct thread *thread __maybe_unused,
|
|
struct ip_callchain *chain __maybe_unused)
|
|
{
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
#endif /* __PERF_CALLCHAIN_H */
|