linux_dsm_epyc7002/tools/perf/util/machine.h

224 lines
7.3 KiB
C
Raw Normal View History

#ifndef __PERF_MACHINE_H
#define __PERF_MACHINE_H
#include <sys/types.h>
#include <linux/rbtree.h>
#include "map.h"
#include "event.h"
perf report/top: Add option to collapse undesired parts of call graph For example, in an application with an expensive function implemented with deeply nested recursive calls, the default call-graph presentation is dominated by the different callchains within that function. By ignoring these callees, we can collect the callchains leading into the function and compactly identify what to blame for expensive calls. For example, in this report the callers of garbage_collect() are scattered across the tree: $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z] 22.03% ruby [.] gc_mark --- gc_mark |--59.40%-- mark_keyvalue | st_foreach | gc_mark_children | |--99.75%-- rb_gc_mark | | rb_vm_mark | | gc_mark_children | | gc_marks | | |--99.00%-- garbage_collect If we ignore the callees of garbage_collect(), its callers are coalesced: $ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z] 72.92% ruby [.] garbage_collect --- garbage_collect vm_xmalloc |--47.08%-- ruby_xmalloc | st_insert2 | rb_hash_aset | |--98.45%-- features_index_add | | rb_provide_feature | | rb_require_safe | | vm_call_method Signed-off-by: Greg Price <price@mit.edu> Tested-by: Jiri Olsa <jolsa@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu Link: http://lkml.kernel.org/r/20130708115746.GO22203@biohazard-cafe.mit.edu Cc: Fengguang Wu <fengguang.wu@intel.com> [ remove spaces at beginning of line, reported by Fengguang Wu ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-12-07 12:48:05 +07:00
struct addr_location;
struct branch_stack;
struct perf_evsel;
struct perf_sample;
struct symbol;
struct thread;
union perf_event;
/* Native host kernel uses -1 as pid index in machine */
#define HOST_KERNEL_ID (-1)
#define DEFAULT_GUEST_KERNEL_ID (0)
extern const char *ref_reloc_sym_names[];
struct vdso_info;
struct machine {
struct rb_node rb_node;
pid_t pid;
u16 id_hdr_size;
bool comm_exec;
char *root_dir;
struct rb_root threads;
struct list_head dead_threads;
struct thread *last_match;
struct vdso_info *vdso_info;
struct list_head user_dsos;
struct list_head kernel_dsos;
struct map_groups kmaps;
struct map *vmlinux_maps[MAP__NR_TYPES];
u64 kernel_start;
symbol_filter_t symbol_filter;
pid_t *current_tid;
};
static inline
struct map *machine__kernel_map(struct machine *machine, enum map_type type)
{
return machine->vmlinux_maps[type];
}
int machine__get_kernel_start(struct machine *machine);
static inline u64 machine__kernel_start(struct machine *machine)
{
if (!machine->kernel_start)
machine__get_kernel_start(machine);
return machine->kernel_start;
}
static inline bool machine__kernel_ip(struct machine *machine, u64 ip)
{
u64 kernel_start = machine__kernel_start(machine);
return ip >= kernel_start;
}
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid);
struct comm *machine__thread_exec_comm(struct machine *machine,
struct thread *thread);
int machine__process_comm_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_exit_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_fork_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_lost_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
typedef void (*machine__process_t)(struct machine *machine, void *data);
struct machines {
struct machine host;
struct rb_root guests;
symbol_filter_t symbol_filter;
};
void machines__init(struct machines *machines);
void machines__exit(struct machines *machines);
void machines__process_guests(struct machines *machines,
machine__process_t process, void *data);
struct machine *machines__add(struct machines *machines, pid_t pid,
const char *root_dir);
struct machine *machines__find_host(struct machines *machines);
struct machine *machines__find(struct machines *machines, pid_t pid);
struct machine *machines__findnew(struct machines *machines, pid_t pid);
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
void machines__set_symbol_filter(struct machines *machines,
symbol_filter_t symbol_filter);
void machines__set_comm_exec(struct machines *machines, bool comm_exec);
struct machine *machine__new_host(void);
int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
void machine__exit(struct machine *machine);
void machine__delete_dead_threads(struct machine *machine);
void machine__delete_threads(struct machine *machine);
void machine__delete(struct machine *machine);
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
struct addr_location *al);
struct mem_info *sample__resolve_mem(struct perf_sample *sample,
struct addr_location *al);
int machine__resolve_callchain(struct machine *machine,
struct perf_evsel *evsel,
struct thread *thread,
struct perf_sample *sample,
perf report/top: Add option to collapse undesired parts of call graph For example, in an application with an expensive function implemented with deeply nested recursive calls, the default call-graph presentation is dominated by the different callchains within that function. By ignoring these callees, we can collect the callchains leading into the function and compactly identify what to blame for expensive calls. For example, in this report the callers of garbage_collect() are scattered across the tree: $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z] 22.03% ruby [.] gc_mark --- gc_mark |--59.40%-- mark_keyvalue | st_foreach | gc_mark_children | |--99.75%-- rb_gc_mark | | rb_vm_mark | | gc_mark_children | | gc_marks | | |--99.00%-- garbage_collect If we ignore the callees of garbage_collect(), its callers are coalesced: $ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z] 72.92% ruby [.] garbage_collect --- garbage_collect vm_xmalloc |--47.08%-- ruby_xmalloc | st_insert2 | rb_hash_aset | |--98.45%-- features_index_add | | rb_provide_feature | | rb_require_safe | | vm_call_method Signed-off-by: Greg Price <price@mit.edu> Tested-by: Jiri Olsa <jolsa@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu Link: http://lkml.kernel.org/r/20130708115746.GO22203@biohazard-cafe.mit.edu Cc: Fengguang Wu <fengguang.wu@intel.com> [ remove spaces at beginning of line, reported by Fengguang Wu ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-12-07 12:48:05 +07:00
struct symbol **parent,
perf report: Add --max-stack option to limit callchain stack scan When callgraph data was included in the perf data file, it may take a long time to scan all those data and merge them together especially if the stored callchains are long and the perf data file itself is large, like a Gbyte or so. The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127). This is a large value. Usually the callgraph data that developers are most interested in are the first few levels, the rests are usually not looked at. This patch adds a new --max-stack option to perf-report to limit the depth of callchain stack data to look at to reduce the time it takes for perf-report to finish its processing. It trades the presence of trailing stack information with faster speed. The following table shows the elapsed time of doing perf-report on a perf.data file of size 985,531,828 bytes. --max_stack Elapsed Time Output data size ----------- ------------ ---------------- not set 88.0s 124,422,651 64 87.5s 116,303,213 32 87.2s 112,023,804 16 86.6s 94,326,380 8 59.9s 33,697,248 4 40.7s 10,116,637 -g none 27.1s 2,555,810 Signed-off-by: Waiman Long <Waiman.Long@hp.com> Acked-by: David Ahern <dsahern@gmail.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Aswin Chandramouleeswaran <aswin@hp.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Scott J Norton <scott.norton@hp.com> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-10-18 21:38:48 +07:00
struct addr_location *root_al,
int max_stack);
/*
* Default guest kernel is defined by parameter --guestkallsyms
* and --guestmodules
*/
static inline bool machine__is_default_guest(struct machine *machine)
{
return machine ? machine->pid == DEFAULT_GUEST_KERNEL_ID : false;
}
static inline bool machine__is_host(struct machine *machine)
{
return machine ? machine->pid == HOST_KERNEL_ID : false;
}
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
pid_t tid);
size_t machine__fprintf(struct machine *machine, FILE *fp);
static inline
struct symbol *machine__find_kernel_symbol(struct machine *machine,
enum map_type type, u64 addr,
struct map **mapp,
symbol_filter_t filter)
{
return map_groups__find_symbol(&machine->kmaps, type, addr,
mapp, filter);
}
static inline
struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
struct map **mapp,
symbol_filter_t filter)
{
return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
mapp, filter);
}
static inline
struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
const char *name,
struct map **mapp,
symbol_filter_t filter)
{
return map_groups__find_function_by_name(&machine->kmaps, name, mapp,
filter);
}
struct map *machine__new_module(struct machine *machine, u64 start,
const char *filename);
int machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type, symbol_filter_t filter);
int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
symbol_filter_t filter);
size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
size_t machines__fprintf_dsos(struct machines *machines, FILE *fp);
size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
void machine__destroy_kernel_maps(struct machine *machine);
int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
int machine__create_kernel_maps(struct machine *machine);
int machines__create_kernel_maps(struct machines *machines, pid_t pid);
int machines__create_guest_kernel_maps(struct machines *machines);
void machines__destroy_kernel_maps(struct machines *machines);
size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv);
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap);
static inline
int machine__synthesize_threads(struct machine *machine, struct target *target,
struct thread_map *threads, bool data_mmap)
{
return __machine__synthesize_threads(machine, NULL, target, threads,
perf_event__process, data_mmap);
}
pid_t machine__get_current_tid(struct machine *machine, int cpu);
int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
pid_t tid);
#endif /* __PERF_MACHINE_H */