Commit 88a57667f2990f00b019d46c8426441c9e516d51

Authored by Linus Torvalds

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes and cleanups from Ingo Molnar:
 "A kernel fix plus mostly tooling fixes, but also some tooling
  restructuring and cleanups"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits)
  perf: Fix building warning on ARM 32
  perf symbols: Fix use after free in filename__read_build_id
  perf evlist: Use roundup_pow_of_two
  tools: Adopt roundup_pow_of_two
  perf tools: Make the mmap length autotuning more robust
  tools: Adopt rounddown_pow_of_two and deps
  tools: Adopt fls_long and deps
  tools: Move bitops.h from tools/perf/util to tools/
  tools: Introduce asm-generic/bitops.h
  tools lib: Move asm-generic/bitops/find.h code to tools/include and tools/lib
  tools: Whitespace prep patches for moving bitops.h
  tools: Move code originally from asm-generic/atomic.h into tools/include/asm-generic/
  tools: Move code originally from linux/log2.h to tools/include/linux/
  tools: Move __ffs implementation to tools/include/asm-generic/bitops/__ffs.h
  perf evlist: Do not use hard coded value for a mmap_pages default
  perf trace: Let the perf_evlist__mmap autosize the number of pages to use
  perf evlist: Improve the strerror_mmap method
  perf evlist: Clarify sterror_mmap variable names
  perf evlist: Fixup brown paper bag on "hint" for --mmap-pages cmdline arg
  perf trace: Provide a better explanation when mmap fails
  ...

Showing 41 changed files Side-by-side Diff

arch/x86/kernel/cpu/perf_event_intel_uncore.c
... ... @@ -276,6 +276,17 @@
276 276 return box;
277 277 }
278 278  
  279 +/*
  280 + * Using uncore_pmu_event_init pmu event_init callback
  281 + * as a detection point for uncore events.
  282 + */
  283 +static int uncore_pmu_event_init(struct perf_event *event);
  284 +
  285 +static bool is_uncore_event(struct perf_event *event)
  286 +{
  287 + return event->pmu->event_init == uncore_pmu_event_init;
  288 +}
  289 +
279 290 static int
280 291 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
281 292 {
282 293  
... ... @@ -290,13 +301,18 @@
290 301 return -EINVAL;
291 302  
292 303 n = box->n_events;
293   - box->event_list[n] = leader;
294   - n++;
  304 +
  305 + if (is_uncore_event(leader)) {
  306 + box->event_list[n] = leader;
  307 + n++;
  308 + }
  309 +
295 310 if (!dogrp)
296 311 return n;
297 312  
298 313 list_for_each_entry(event, &leader->sibling_list, group_entry) {
299   - if (event->state <= PERF_EVENT_STATE_OFF)
  314 + if (!is_uncore_event(event) ||
  315 + event->state <= PERF_EVENT_STATE_OFF)
300 316 continue;
301 317  
302 318 if (n >= max_count)
kernel/events/core.c
... ... @@ -7477,11 +7477,11 @@
7477 7477  
7478 7478 if (move_group) {
7479 7479 synchronize_rcu();
7480   - perf_install_in_context(ctx, group_leader, event->cpu);
  7480 + perf_install_in_context(ctx, group_leader, group_leader->cpu);
7481 7481 get_ctx(ctx);
7482 7482 list_for_each_entry(sibling, &group_leader->sibling_list,
7483 7483 group_entry) {
7484   - perf_install_in_context(ctx, sibling, event->cpu);
  7484 + perf_install_in_context(ctx, sibling, sibling->cpu);
7485 7485 get_ctx(ctx);
7486 7486 }
7487 7487 }
scripts/kconfig/mconf.c
... ... @@ -330,10 +330,10 @@
330 330 list_for_each_entry(sp, &trail, entries) {
331 331 if (sp->text) {
332 332 if (pos) {
333   - pos->next = xcalloc(sizeof(*pos), 1);
  333 + pos->next = xcalloc(1, sizeof(*pos));
334 334 pos = pos->next;
335 335 } else {
336   - subtitles = pos = xcalloc(sizeof(*pos), 1);
  336 + subtitles = pos = xcalloc(1, sizeof(*pos));
337 337 }
338 338 pos->text = sp->text;
339 339 }
tools/include/asm-generic/bitops.h
  1 +#ifndef __TOOLS_ASM_GENERIC_BITOPS_H
  2 +#define __TOOLS_ASM_GENERIC_BITOPS_H
  3 +
  4 +/*
  5 + * tools/ copied this from include/asm-generic/bitops.h, bit by bit as it needed
  6 + * some functions.
  7 + *
  8 + * For the benefit of those who are trying to port Linux to another
  9 + * architecture, here are some C-language equivalents. You should
  10 + * recode these in the native assembly language, if at all possible.
  11 + *
  12 + * C language equivalents written by Theodore Ts'o, 9/26/92
  13 + */
  14 +
  15 +#include <asm-generic/bitops/__ffs.h>
  16 +#include <asm-generic/bitops/fls.h>
  17 +#include <asm-generic/bitops/__fls.h>
  18 +#include <asm-generic/bitops/fls64.h>
  19 +#include <asm-generic/bitops/find.h>
  20 +
  21 +#ifndef _TOOLS_LINUX_BITOPS_H_
  22 +#error only <linux/bitops.h> can be included directly
  23 +#endif
  24 +
  25 +#include <asm-generic/bitops/atomic.h>
  26 +
  27 +#endif /* __TOOLS_ASM_GENERIC_BITOPS_H */
tools/include/asm-generic/bitops/__ffs.h
  1 +#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
  2 +#define _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
  3 +
  4 +#include <asm/types.h>
  5 +
  6 +/**
  7 + * __ffs - find first bit in word.
  8 + * @word: The word to search
  9 + *
  10 + * Undefined if no bit exists, so code should check against 0 first.
  11 + */
  12 +static __always_inline unsigned long __ffs(unsigned long word)
  13 +{
  14 + int num = 0;
  15 +
  16 +#if __BITS_PER_LONG == 64
  17 + if ((word & 0xffffffff) == 0) {
  18 + num += 32;
  19 + word >>= 32;
  20 + }
  21 +#endif
  22 + if ((word & 0xffff) == 0) {
  23 + num += 16;
  24 + word >>= 16;
  25 + }
  26 + if ((word & 0xff) == 0) {
  27 + num += 8;
  28 + word >>= 8;
  29 + }
  30 + if ((word & 0xf) == 0) {
  31 + num += 4;
  32 + word >>= 4;
  33 + }
  34 + if ((word & 0x3) == 0) {
  35 + num += 2;
  36 + word >>= 2;
  37 + }
  38 + if ((word & 0x1) == 0)
  39 + num += 1;
  40 + return num;
  41 +}
  42 +
  43 +#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ */
tools/include/asm-generic/bitops/__fls.h
  1 +#include <../../../../include/asm-generic/bitops/__fls.h>
tools/include/asm-generic/bitops/atomic.h
  1 +#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
  2 +#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
  3 +
  4 +#include <asm/types.h>
  5 +
  6 +static inline void set_bit(int nr, unsigned long *addr)
  7 +{
  8 + addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG);
  9 +}
  10 +
  11 +static inline void clear_bit(int nr, unsigned long *addr)
  12 +{
  13 + addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
  14 +}
  15 +
  16 +static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
  17 +{
  18 + return ((1UL << (nr % __BITS_PER_LONG)) &
  19 + (((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
  20 +}
  21 +
  22 +#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
tools/include/asm-generic/bitops/find.h
  1 +#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
  2 +#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
  3 +
  4 +#ifndef find_next_bit
  5 +/**
  6 + * find_next_bit - find the next set bit in a memory region
  7 + * @addr: The address to base the search on
  8 + * @offset: The bitnumber to start searching at
  9 + * @size: The bitmap size in bits
  10 + *
  11 + * Returns the bit number for the next set bit
  12 + * If no bits are set, returns @size.
  13 + */
  14 +extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
  15 + size, unsigned long offset);
  16 +#endif
  17 +
  18 +#ifndef find_first_bit
  19 +
  20 +/**
  21 + * find_first_bit - find the first set bit in a memory region
  22 + * @addr: The address to start the search at
  23 + * @size: The maximum number of bits to search
  24 + *
  25 + * Returns the bit number of the first set bit.
  26 + * If no bits are set, returns @size.
  27 + */
  28 +extern unsigned long find_first_bit(const unsigned long *addr,
  29 + unsigned long size);
  30 +
  31 +#endif /* find_first_bit */
  32 +
  33 +#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
tools/include/asm-generic/bitops/fls.h
  1 +#include <../../../../include/asm-generic/bitops/fls.h>
tools/include/asm-generic/bitops/fls64.h
  1 +#include <../../../../include/asm-generic/bitops/fls64.h>
tools/include/linux/bitops.h
  1 +#ifndef _TOOLS_LINUX_BITOPS_H_
  2 +#define _TOOLS_LINUX_BITOPS_H_
  3 +
  4 +#include <linux/kernel.h>
  5 +#include <linux/compiler.h>
  6 +#include <asm/hweight.h>
  7 +
  8 +#ifndef __WORDSIZE
  9 +#define __WORDSIZE (__SIZEOF_LONG__ * 8)
  10 +#endif
  11 +
  12 +#define BITS_PER_LONG __WORDSIZE
  13 +
  14 +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
  15 +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
  16 +#define BITS_PER_BYTE 8
  17 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
  18 +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
  19 +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
  20 +#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE)
  21 +
  22 +/*
  23 + * Include this here because some architectures need generic_ffs/fls in
  24 + * scope
  25 + *
  26 + * XXX: this needs to be asm/bitops.h, when we get to per arch optimizations
  27 + */
  28 +#include <asm-generic/bitops.h>
  29 +
  30 +#define for_each_set_bit(bit, addr, size) \
  31 + for ((bit) = find_first_bit((addr), (size)); \
  32 + (bit) < (size); \
  33 + (bit) = find_next_bit((addr), (size), (bit) + 1))
  34 +
  35 +/* same as for_each_set_bit() but use bit as value to start with */
  36 +#define for_each_set_bit_from(bit, addr, size) \
  37 + for ((bit) = find_next_bit((addr), (size), (bit)); \
  38 + (bit) < (size); \
  39 + (bit) = find_next_bit((addr), (size), (bit) + 1))
  40 +
  41 +static inline unsigned long hweight_long(unsigned long w)
  42 +{
  43 + return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
  44 +}
  45 +
  46 +static inline unsigned fls_long(unsigned long l)
  47 +{
  48 + if (sizeof(l) == 4)
  49 + return fls(l);
  50 + return fls64(l);
  51 +}
  52 +
  53 +#endif
tools/include/linux/log2.h
  1 +/* Integer base 2 logarithm calculation
  2 + *
  3 + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
  4 + * Written by David Howells (dhowells@redhat.com)
  5 + *
  6 + * This program is free software; you can redistribute it and/or
  7 + * modify it under the terms of the GNU General Public License
  8 + * as published by the Free Software Foundation; either version
  9 + * 2 of the License, or (at your option) any later version.
  10 + */
  11 +
  12 +#ifndef _TOOLS_LINUX_LOG2_H
  13 +#define _TOOLS_LINUX_LOG2_H
  14 +
  15 +/*
  16 + * deal with unrepresentable constant logarithms
  17 + */
  18 +extern __attribute__((const, noreturn))
  19 +int ____ilog2_NaN(void);
  20 +
  21 +/*
  22 + * non-constant log of base 2 calculators
  23 + * - the arch may override these in asm/bitops.h if they can be implemented
  24 + * more efficiently than using fls() and fls64()
  25 + * - the arch is not required to handle n==0 if implementing the fallback
  26 + */
  27 +static inline __attribute__((const))
  28 +int __ilog2_u32(u32 n)
  29 +{
  30 + return fls(n) - 1;
  31 +}
  32 +
  33 +static inline __attribute__((const))
  34 +int __ilog2_u64(u64 n)
  35 +{
  36 + return fls64(n) - 1;
  37 +}
  38 +
  39 +/*
  40 + * Determine whether some value is a power of two, where zero is
  41 + * *not* considered a power of two.
  42 + */
  43 +
  44 +static inline __attribute__((const))
  45 +bool is_power_of_2(unsigned long n)
  46 +{
  47 + return (n != 0 && ((n & (n - 1)) == 0));
  48 +}
  49 +
  50 +/*
  51 + * round up to nearest power of two
  52 + */
  53 +static inline __attribute__((const))
  54 +unsigned long __roundup_pow_of_two(unsigned long n)
  55 +{
  56 + return 1UL << fls_long(n - 1);
  57 +}
  58 +
  59 +/*
  60 + * round down to nearest power of two
  61 + */
  62 +static inline __attribute__((const))
  63 +unsigned long __rounddown_pow_of_two(unsigned long n)
  64 +{
  65 + return 1UL << (fls_long(n) - 1);
  66 +}
  67 +
  68 +/**
  69 + * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
  70 + * @n - parameter
  71 + *
  72 + * constant-capable log of base 2 calculation
  73 + * - this can be used to initialise global variables from constant data, hence
  74 + * the massive ternary operator construction
  75 + *
  76 + * selects the appropriately-sized optimised version depending on sizeof(n)
  77 + */
  78 +#define ilog2(n) \
  79 +( \
  80 + __builtin_constant_p(n) ? ( \
  81 + (n) < 1 ? ____ilog2_NaN() : \
  82 + (n) & (1ULL << 63) ? 63 : \
  83 + (n) & (1ULL << 62) ? 62 : \
  84 + (n) & (1ULL << 61) ? 61 : \
  85 + (n) & (1ULL << 60) ? 60 : \
  86 + (n) & (1ULL << 59) ? 59 : \
  87 + (n) & (1ULL << 58) ? 58 : \
  88 + (n) & (1ULL << 57) ? 57 : \
  89 + (n) & (1ULL << 56) ? 56 : \
  90 + (n) & (1ULL << 55) ? 55 : \
  91 + (n) & (1ULL << 54) ? 54 : \
  92 + (n) & (1ULL << 53) ? 53 : \
  93 + (n) & (1ULL << 52) ? 52 : \
  94 + (n) & (1ULL << 51) ? 51 : \
  95 + (n) & (1ULL << 50) ? 50 : \
  96 + (n) & (1ULL << 49) ? 49 : \
  97 + (n) & (1ULL << 48) ? 48 : \
  98 + (n) & (1ULL << 47) ? 47 : \
  99 + (n) & (1ULL << 46) ? 46 : \
  100 + (n) & (1ULL << 45) ? 45 : \
  101 + (n) & (1ULL << 44) ? 44 : \
  102 + (n) & (1ULL << 43) ? 43 : \
  103 + (n) & (1ULL << 42) ? 42 : \
  104 + (n) & (1ULL << 41) ? 41 : \
  105 + (n) & (1ULL << 40) ? 40 : \
  106 + (n) & (1ULL << 39) ? 39 : \
  107 + (n) & (1ULL << 38) ? 38 : \
  108 + (n) & (1ULL << 37) ? 37 : \
  109 + (n) & (1ULL << 36) ? 36 : \
  110 + (n) & (1ULL << 35) ? 35 : \
  111 + (n) & (1ULL << 34) ? 34 : \
  112 + (n) & (1ULL << 33) ? 33 : \
  113 + (n) & (1ULL << 32) ? 32 : \
  114 + (n) & (1ULL << 31) ? 31 : \
  115 + (n) & (1ULL << 30) ? 30 : \
  116 + (n) & (1ULL << 29) ? 29 : \
  117 + (n) & (1ULL << 28) ? 28 : \
  118 + (n) & (1ULL << 27) ? 27 : \
  119 + (n) & (1ULL << 26) ? 26 : \
  120 + (n) & (1ULL << 25) ? 25 : \
  121 + (n) & (1ULL << 24) ? 24 : \
  122 + (n) & (1ULL << 23) ? 23 : \
  123 + (n) & (1ULL << 22) ? 22 : \
  124 + (n) & (1ULL << 21) ? 21 : \
  125 + (n) & (1ULL << 20) ? 20 : \
  126 + (n) & (1ULL << 19) ? 19 : \
  127 + (n) & (1ULL << 18) ? 18 : \
  128 + (n) & (1ULL << 17) ? 17 : \
  129 + (n) & (1ULL << 16) ? 16 : \
  130 + (n) & (1ULL << 15) ? 15 : \
  131 + (n) & (1ULL << 14) ? 14 : \
  132 + (n) & (1ULL << 13) ? 13 : \
  133 + (n) & (1ULL << 12) ? 12 : \
  134 + (n) & (1ULL << 11) ? 11 : \
  135 + (n) & (1ULL << 10) ? 10 : \
  136 + (n) & (1ULL << 9) ? 9 : \
  137 + (n) & (1ULL << 8) ? 8 : \
  138 + (n) & (1ULL << 7) ? 7 : \
  139 + (n) & (1ULL << 6) ? 6 : \
  140 + (n) & (1ULL << 5) ? 5 : \
  141 + (n) & (1ULL << 4) ? 4 : \
  142 + (n) & (1ULL << 3) ? 3 : \
  143 + (n) & (1ULL << 2) ? 2 : \
  144 + (n) & (1ULL << 1) ? 1 : \
  145 + (n) & (1ULL << 0) ? 0 : \
  146 + ____ilog2_NaN() \
  147 + ) : \
  148 + (sizeof(n) <= 4) ? \
  149 + __ilog2_u32(n) : \
  150 + __ilog2_u64(n) \
  151 + )
  152 +
  153 +/**
  154 + * roundup_pow_of_two - round the given value up to nearest power of two
  155 + * @n - parameter
  156 + *
  157 + * round the given value up to the nearest power of two
  158 + * - the result is undefined when n == 0
  159 + * - this can be used to initialise global variables from constant data
  160 + */
  161 +#define roundup_pow_of_two(n) \
  162 +( \
  163 + __builtin_constant_p(n) ? ( \
  164 + (n == 1) ? 1 : \
  165 + (1UL << (ilog2((n) - 1) + 1)) \
  166 + ) : \
  167 + __roundup_pow_of_two(n) \
  168 + )
  169 +
  170 +/**
  171 + * rounddown_pow_of_two - round the given value down to nearest power of two
  172 + * @n - parameter
  173 + *
  174 + * round the given value down to the nearest power of two
  175 + * - the result is undefined when n == 0
  176 + * - this can be used to initialise global variables from constant data
  177 + */
  178 +#define rounddown_pow_of_two(n) \
  179 +( \
  180 + __builtin_constant_p(n) ? ( \
  181 + (1UL << ilog2(n))) : \
  182 + __rounddown_pow_of_two(n) \
  183 + )
  184 +
  185 +#endif /* _TOOLS_LINUX_LOG2_H */
tools/lib/api/fs/fs.c
... ... @@ -7,6 +7,10 @@
7 7 #include <stdlib.h>
8 8 #include <string.h>
9 9 #include <sys/vfs.h>
  10 +#include <sys/types.h>
  11 +#include <sys/stat.h>
  12 +#include <fcntl.h>
  13 +#include <unistd.h>
10 14  
11 15 #include "debugfs.h"
12 16 #include "fs.h"
... ... @@ -163,4 +167,34 @@
163 167  
164 168 FS__MOUNTPOINT(sysfs, FS__SYSFS);
165 169 FS__MOUNTPOINT(procfs, FS__PROCFS);
  170 +
  171 +int filename__read_int(const char *filename, int *value)
  172 +{
  173 + char line[64];
  174 + int fd = open(filename, O_RDONLY), err = -1;
  175 +
  176 + if (fd < 0)
  177 + return -1;
  178 +
  179 + if (read(fd, line, sizeof(line)) > 0) {
  180 + *value = atoi(line);
  181 + err = 0;
  182 + }
  183 +
  184 + close(fd);
  185 + return err;
  186 +}
  187 +
  188 +int sysctl__read_int(const char *sysctl, int *value)
  189 +{
  190 + char path[PATH_MAX];
  191 + const char *procfs = procfs__mountpoint();
  192 +
  193 + if (!procfs)
  194 + return -1;
  195 +
  196 + snprintf(path, sizeof(path), "%s/sys/%s", procfs, sysctl);
  197 +
  198 + return filename__read_int(path, value);
  199 +}
tools/lib/api/fs/fs.h
... ... @@ -11,5 +11,8 @@
11 11  
12 12 const char *sysfs__mountpoint(void);
13 13 const char *procfs__mountpoint(void);
  14 +
  15 +int filename__read_int(const char *filename, int *value);
  16 +int sysctl__read_int(const char *sysctl, int *value);
14 17 #endif /* __API_FS__ */
tools/lib/util/find_next_bit.c
  1 +/* find_next_bit.c: fallback find next bit implementation
  2 + *
  3 + * Copied from lib/find_next_bit.c to tools/lib/next_bit.c
  4 + *
  5 + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  6 + * Written by David Howells (dhowells@redhat.com)
  7 + *
  8 + * This program is free software; you can redistribute it and/or
  9 + * modify it under the terms of the GNU General Public License
  10 + * as published by the Free Software Foundation; either version
  11 + * 2 of the License, or (at your option) any later version.
  12 + */
  13 +
  14 +#include <linux/bitops.h>
  15 +#include <asm/types.h>
  16 +#include <asm/byteorder.h>
  17 +
  18 +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
  19 +
  20 +#ifndef find_next_bit
  21 +/*
  22 + * Find the next set bit in a memory region.
  23 + */
  24 +unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
  25 + unsigned long offset)
  26 +{
  27 + const unsigned long *p = addr + BITOP_WORD(offset);
  28 + unsigned long result = offset & ~(BITS_PER_LONG-1);
  29 + unsigned long tmp;
  30 +
  31 + if (offset >= size)
  32 + return size;
  33 + size -= result;
  34 + offset %= BITS_PER_LONG;
  35 + if (offset) {
  36 + tmp = *(p++);
  37 + tmp &= (~0UL << offset);
  38 + if (size < BITS_PER_LONG)
  39 + goto found_first;
  40 + if (tmp)
  41 + goto found_middle;
  42 + size -= BITS_PER_LONG;
  43 + result += BITS_PER_LONG;
  44 + }
  45 + while (size & ~(BITS_PER_LONG-1)) {
  46 + if ((tmp = *(p++)))
  47 + goto found_middle;
  48 + result += BITS_PER_LONG;
  49 + size -= BITS_PER_LONG;
  50 + }
  51 + if (!size)
  52 + return result;
  53 + tmp = *p;
  54 +
  55 +found_first:
  56 + tmp &= (~0UL >> (BITS_PER_LONG - size));
  57 + if (tmp == 0UL) /* Are any bits set? */
  58 + return result + size; /* Nope. */
  59 +found_middle:
  60 + return result + __ffs(tmp);
  61 +}
  62 +#endif
  63 +
  64 +#ifndef find_first_bit
  65 +/*
  66 + * Find the first set bit in a memory region.
  67 + */
  68 +unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
  69 +{
  70 + const unsigned long *p = addr;
  71 + unsigned long result = 0;
  72 + unsigned long tmp;
  73 +
  74 + while (size & ~(BITS_PER_LONG-1)) {
  75 + if ((tmp = *(p++)))
  76 + goto found;
  77 + result += BITS_PER_LONG;
  78 + size -= BITS_PER_LONG;
  79 + }
  80 + if (!size)
  81 + return result;
  82 +
  83 + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
  84 + if (tmp == 0UL) /* Are any bits set? */
  85 + return result + size; /* Nope. */
  86 +found:
  87 + return result + __ffs(tmp);
  88 +}
  89 +#endif
tools/perf/Documentation/perf.txt
... ... @@ -18,6 +18,10 @@
18 18 --debug verbose # sets verbose = 1
19 19 --debug verbose=2 # sets verbose = 2
20 20  
  21 +--buildid-dir::
  22 + Setup buildid cache directory. It has higher priority than
  23 + buildid.dir config file option.
  24 +
21 25 DESCRIPTION
22 26 -----------
23 27 Performance counters for Linux are a new kernel-based subsystem
... ... @@ -4,17 +4,31 @@
4 4 tools/lib/api
5 5 tools/lib/symbol/kallsyms.c
6 6 tools/lib/symbol/kallsyms.h
  7 +tools/lib/util/find_next_bit.c
7 8 tools/include/asm/bug.h
  9 +tools/include/asm-generic/bitops/atomic.h
  10 +tools/include/asm-generic/bitops/__ffs.h
  11 +tools/include/asm-generic/bitops/__fls.h
  12 +tools/include/asm-generic/bitops/find.h
  13 +tools/include/asm-generic/bitops/fls64.h
  14 +tools/include/asm-generic/bitops/fls.h
  15 +tools/include/asm-generic/bitops.h
  16 +tools/include/linux/bitops.h
8 17 tools/include/linux/compiler.h
9   -tools/include/linux/hash.h
10 18 tools/include/linux/export.h
  19 +tools/include/linux/hash.h
  20 +tools/include/linux/log2.h
11 21 tools/include/linux/types.h
  22 +include/asm-generic/bitops/fls64.h
  23 +include/asm-generic/bitops/__fls.h
  24 +include/asm-generic/bitops/fls.h
12 25 include/linux/const.h
13 26 include/linux/perf_event.h
14 27 include/linux/rbtree.h
15 28 include/linux/list.h
16 29 include/linux/hash.h
17 30 include/linux/stringify.h
  31 +lib/find_next_bit.c
18 32 lib/rbtree.c
19 33 include/linux/swab.h
20 34 arch/*/include/asm/unistd*.h
tools/perf/Makefile.perf
... ... @@ -231,8 +231,16 @@
231 231 LIB_H += ../include/linux/hash.h
232 232 LIB_H += ../../include/linux/stringify.h
233 233 LIB_H += util/include/linux/bitmap.h
234   -LIB_H += util/include/linux/bitops.h
  234 +LIB_H += ../include/linux/bitops.h
  235 +LIB_H += ../include/asm-generic/bitops/atomic.h
  236 +LIB_H += ../include/asm-generic/bitops/find.h
  237 +LIB_H += ../include/asm-generic/bitops/fls64.h
  238 +LIB_H += ../include/asm-generic/bitops/fls.h
  239 +LIB_H += ../include/asm-generic/bitops/__ffs.h
  240 +LIB_H += ../include/asm-generic/bitops/__fls.h
  241 +LIB_H += ../include/asm-generic/bitops.h
235 242 LIB_H += ../include/linux/compiler.h
  243 +LIB_H += ../include/linux/log2.h
236 244 LIB_H += util/include/linux/const.h
237 245 LIB_H += util/include/linux/ctype.h
238 246 LIB_H += util/include/linux/kernel.h
... ... @@ -335,6 +343,7 @@
335 343 LIB_OBJS += $(OUTPUT)util/evlist.o
336 344 LIB_OBJS += $(OUTPUT)util/evsel.o
337 345 LIB_OBJS += $(OUTPUT)util/exec_cmd.o
  346 +LIB_OBJS += $(OUTPUT)util/find_next_bit.o
338 347 LIB_OBJS += $(OUTPUT)util/help.o
339 348 LIB_OBJS += $(OUTPUT)util/kallsyms.o
340 349 LIB_OBJS += $(OUTPUT)util/levenshtein.o
... ... @@ -458,7 +467,6 @@
458 467 BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
459 468 endif
460 469 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
461   -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
462 470 BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
463 471 BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
464 472 BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
... ... @@ -733,6 +741,9 @@
733 741 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
734 742  
735 743 $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
  744 + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
  745 +
  746 +$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS
736 747 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
737 748  
738 749 $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
tools/perf/bench/mem-memcpy.c
... ... @@ -13,6 +13,7 @@
13 13 #include "../util/cloexec.h"
14 14 #include "bench.h"
15 15 #include "mem-memcpy-arch.h"
  16 +#include "mem-memset-arch.h"
16 17  
17 18 #include <stdio.h>
18 19 #include <stdlib.h>
19 20  
20 21  
21 22  
... ... @@ -48,20 +49,24 @@
48 49 };
49 50  
50 51 typedef void *(*memcpy_t)(void *, const void *, size_t);
  52 +typedef void *(*memset_t)(void *, int, size_t);
51 53  
52 54 struct routine {
53 55 const char *name;
54 56 const char *desc;
55   - memcpy_t fn;
  57 + union {
  58 + memcpy_t memcpy;
  59 + memset_t memset;
  60 + } fn;
56 61 };
57 62  
58   -struct routine routines[] = {
59   - { "default",
60   - "Default memcpy() provided by glibc",
61   - memcpy },
  63 +struct routine memcpy_routines[] = {
  64 + { .name = "default",
  65 + .desc = "Default memcpy() provided by glibc",
  66 + .fn.memcpy = memcpy },
62 67 #ifdef HAVE_ARCH_X86_64_SUPPORT
63 68  
64   -#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
  69 +#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
65 70 #include "mem-memcpy-x86-64-asm-def.h"
66 71 #undef MEMCPY_FN
67 72  
... ... @@ -69,7 +74,7 @@
69 74  
70 75 { NULL,
71 76 NULL,
72   - NULL }
  77 + {NULL} }
73 78 };
74 79  
75 80 static const char * const bench_mem_memcpy_usage[] = {
... ... @@ -110,63 +115,6 @@
110 115 (double)ts->tv_usec / (double)1000000;
111 116 }
112 117  
113   -static void alloc_mem(void **dst, void **src, size_t length)
114   -{
115   - *dst = zalloc(length);
116   - if (!*dst)
117   - die("memory allocation failed - maybe length is too large?\n");
118   -
119   - *src = zalloc(length);
120   - if (!*src)
121   - die("memory allocation failed - maybe length is too large?\n");
122   - /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
123   - memset(*src, 0, length);
124   -}
125   -
126   -static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
127   -{
128   - u64 cycle_start = 0ULL, cycle_end = 0ULL;
129   - void *src = NULL, *dst = NULL;
130   - int i;
131   -
132   - alloc_mem(&src, &dst, len);
133   -
134   - if (prefault)
135   - fn(dst, src, len);
136   -
137   - cycle_start = get_cycle();
138   - for (i = 0; i < iterations; ++i)
139   - fn(dst, src, len);
140   - cycle_end = get_cycle();
141   -
142   - free(src);
143   - free(dst);
144   - return cycle_end - cycle_start;
145   -}
146   -
147   -static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
148   -{
149   - struct timeval tv_start, tv_end, tv_diff;
150   - void *src = NULL, *dst = NULL;
151   - int i;
152   -
153   - alloc_mem(&src, &dst, len);
154   -
155   - if (prefault)
156   - fn(dst, src, len);
157   -
158   - BUG_ON(gettimeofday(&tv_start, NULL));
159   - for (i = 0; i < iterations; ++i)
160   - fn(dst, src, len);
161   - BUG_ON(gettimeofday(&tv_end, NULL));
162   -
163   - timersub(&tv_end, &tv_start, &tv_diff);
164   -
165   - free(src);
166   - free(dst);
167   - return (double)((double)len / timeval2double(&tv_diff));
168   -}
169   -
170 118 #define pf (no_prefault ? 0 : 1)
171 119  
172 120 #define print_bps(x) do { \
173 121  
174 122  
... ... @@ -180,16 +128,25 @@
180 128 printf(" %14lf GB/Sec", x / K / K / K); \
181 129 } while (0)
182 130  
183   -int bench_mem_memcpy(int argc, const char **argv,
184   - const char *prefix __maybe_unused)
  131 +struct bench_mem_info {
  132 + const struct routine *routines;
  133 + u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault);
  134 + double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault);
  135 + const char *const *usage;
  136 +};
  137 +
  138 +static int bench_mem_common(int argc, const char **argv,
  139 + const char *prefix __maybe_unused,
  140 + struct bench_mem_info *info)
185 141 {
186 142 int i;
187 143 size_t len;
  144 + double totallen;
188 145 double result_bps[2];
189 146 u64 result_cycle[2];
190 147  
191 148 argc = parse_options(argc, argv, options,
192   - bench_mem_memcpy_usage, 0);
  149 + info->usage, 0);
193 150  
194 151 if (no_prefault && only_prefault) {
195 152 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
... ... @@ -200,6 +157,7 @@
200 157 init_cycle();
201 158  
202 159 len = (size_t)perf_atoll((char *)length_str);
  160 + totallen = (double)len * iterations;
203 161  
204 162 result_cycle[0] = result_cycle[1] = 0ULL;
205 163 result_bps[0] = result_bps[1] = 0.0;
206 164  
207 165  
208 166  
... ... @@ -213,16 +171,16 @@
213 171 if (only_prefault && no_prefault)
214 172 only_prefault = no_prefault = false;
215 173  
216   - for (i = 0; routines[i].name; i++) {
217   - if (!strcmp(routines[i].name, routine))
  174 + for (i = 0; info->routines[i].name; i++) {
  175 + if (!strcmp(info->routines[i].name, routine))
218 176 break;
219 177 }
220   - if (!routines[i].name) {
  178 + if (!info->routines[i].name) {
221 179 printf("Unknown routine:%s\n", routine);
222 180 printf("Available routines...\n");
223   - for (i = 0; routines[i].name; i++) {
  181 + for (i = 0; info->routines[i].name; i++) {
224 182 printf("\t%s ... %s\n",
225   - routines[i].name, routines[i].desc);
  183 + info->routines[i].name, info->routines[i].desc);
226 184 }
227 185 return 1;
228 186 }
229 187  
230 188  
231 189  
232 190  
233 191  
... ... @@ -234,25 +192,25 @@
234 192 /* show both of results */
235 193 if (use_cycle) {
236 194 result_cycle[0] =
237   - do_memcpy_cycle(routines[i].fn, len, false);
  195 + info->do_cycle(&info->routines[i], len, false);
238 196 result_cycle[1] =
239   - do_memcpy_cycle(routines[i].fn, len, true);
  197 + info->do_cycle(&info->routines[i], len, true);
240 198 } else {
241 199 result_bps[0] =
242   - do_memcpy_gettimeofday(routines[i].fn,
  200 + info->do_gettimeofday(&info->routines[i],
243 201 len, false);
244 202 result_bps[1] =
245   - do_memcpy_gettimeofday(routines[i].fn,
  203 + info->do_gettimeofday(&info->routines[i],
246 204 len, true);
247 205 }
248 206 } else {
249 207 if (use_cycle) {
250 208 result_cycle[pf] =
251   - do_memcpy_cycle(routines[i].fn,
  209 + info->do_cycle(&info->routines[i],
252 210 len, only_prefault);
253 211 } else {
254 212 result_bps[pf] =
255   - do_memcpy_gettimeofday(routines[i].fn,
  213 + info->do_gettimeofday(&info->routines[i],
256 214 len, only_prefault);
257 215 }
258 216 }
259 217  
... ... @@ -263,10 +221,10 @@
263 221 if (use_cycle) {
264 222 printf(" %14lf Cycle/Byte\n",
265 223 (double)result_cycle[0]
266   - / (double)len);
  224 + / totallen);
267 225 printf(" %14lf Cycle/Byte (with prefault)\n",
268 226 (double)result_cycle[1]
269   - / (double)len);
  227 + / totallen);
270 228 } else {
271 229 print_bps(result_bps[0]);
272 230 printf("\n");
... ... @@ -277,7 +235,7 @@
277 235 if (use_cycle) {
278 236 printf(" %14lf Cycle/Byte",
279 237 (double)result_cycle[pf]
280   - / (double)len);
  238 + / totallen);
281 239 } else
282 240 print_bps(result_bps[pf]);
283 241  
... ... @@ -288,8 +246,8 @@
288 246 if (!only_prefault && !no_prefault) {
289 247 if (use_cycle) {
290 248 printf("%lf %lf\n",
291   - (double)result_cycle[0] / (double)len,
292   - (double)result_cycle[1] / (double)len);
  249 + (double)result_cycle[0] / totallen,
  250 + (double)result_cycle[1] / totallen);
293 251 } else {
294 252 printf("%lf %lf\n",
295 253 result_bps[0], result_bps[1]);
... ... @@ -297,7 +255,7 @@
297 255 } else {
298 256 if (use_cycle) {
299 257 printf("%lf\n", (double)result_cycle[pf]
300   - / (double)len);
  258 + / totallen);
301 259 } else
302 260 printf("%lf\n", result_bps[pf]);
303 261 }
... ... @@ -309,5 +267,165 @@
309 267 }
310 268  
311 269 return 0;
  270 +}
  271 +
  272 +static void memcpy_alloc_mem(void **dst, void **src, size_t length)
  273 +{
  274 + *dst = zalloc(length);
  275 + if (!*dst)
  276 + die("memory allocation failed - maybe length is too large?\n");
  277 +
  278 + *src = zalloc(length);
  279 + if (!*src)
  280 + die("memory allocation failed - maybe length is too large?\n");
  281 + /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
  282 + memset(*src, 0, length);
  283 +}
  284 +
  285 +static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
  286 +{
  287 + u64 cycle_start = 0ULL, cycle_end = 0ULL;
  288 + void *src = NULL, *dst = NULL;
  289 + memcpy_t fn = r->fn.memcpy;
  290 + int i;
  291 +
  292 + memcpy_alloc_mem(&src, &dst, len);
  293 +
  294 + if (prefault)
  295 + fn(dst, src, len);
  296 +
  297 + cycle_start = get_cycle();
  298 + for (i = 0; i < iterations; ++i)
  299 + fn(dst, src, len);
  300 + cycle_end = get_cycle();
  301 +
  302 + free(src);
  303 + free(dst);
  304 + return cycle_end - cycle_start;
  305 +}
  306 +
  307 +static double do_memcpy_gettimeofday(const struct routine *r, size_t len,
  308 + bool prefault)
  309 +{
  310 + struct timeval tv_start, tv_end, tv_diff;
  311 + memcpy_t fn = r->fn.memcpy;
  312 + void *src = NULL, *dst = NULL;
  313 + int i;
  314 +
  315 + memcpy_alloc_mem(&src, &dst, len);
  316 +
  317 + if (prefault)
  318 + fn(dst, src, len);
  319 +
  320 + BUG_ON(gettimeofday(&tv_start, NULL));
  321 + for (i = 0; i < iterations; ++i)
  322 + fn(dst, src, len);
  323 + BUG_ON(gettimeofday(&tv_end, NULL));
  324 +
  325 + timersub(&tv_end, &tv_start, &tv_diff);
  326 +
  327 + free(src);
  328 + free(dst);
  329 + return (double)(((double)len * iterations) / timeval2double(&tv_diff));
  330 +}
  331 +
  332 +int bench_mem_memcpy(int argc, const char **argv,
  333 + const char *prefix __maybe_unused)
  334 +{
  335 + struct bench_mem_info info = {
  336 + .routines = memcpy_routines,
  337 + .do_cycle = do_memcpy_cycle,
  338 + .do_gettimeofday = do_memcpy_gettimeofday,
  339 + .usage = bench_mem_memcpy_usage,
  340 + };
  341 +
  342 + return bench_mem_common(argc, argv, prefix, &info);
  343 +}
  344 +
  345 +static void memset_alloc_mem(void **dst, size_t length)
  346 +{
  347 + *dst = zalloc(length);
  348 + if (!*dst)
  349 + die("memory allocation failed - maybe length is too large?\n");
  350 +}
  351 +
  352 +static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
  353 +{
  354 + u64 cycle_start = 0ULL, cycle_end = 0ULL;
  355 + memset_t fn = r->fn.memset;
  356 + void *dst = NULL;
  357 + int i;
  358 +
  359 + memset_alloc_mem(&dst, len);
  360 +
  361 + if (prefault)
  362 + fn(dst, -1, len);
  363 +
  364 + cycle_start = get_cycle();
  365 + for (i = 0; i < iterations; ++i)
  366 + fn(dst, i, len);
  367 + cycle_end = get_cycle();
  368 +
  369 + free(dst);
  370 + return cycle_end - cycle_start;
  371 +}
  372 +
  373 +static double do_memset_gettimeofday(const struct routine *r, size_t len,
  374 + bool prefault)
  375 +{
  376 + struct timeval tv_start, tv_end, tv_diff;
  377 + memset_t fn = r->fn.memset;
  378 + void *dst = NULL;
  379 + int i;
  380 +
  381 + memset_alloc_mem(&dst, len);
  382 +
  383 + if (prefault)
  384 + fn(dst, -1, len);
  385 +
  386 + BUG_ON(gettimeofday(&tv_start, NULL));
  387 + for (i = 0; i < iterations; ++i)
  388 + fn(dst, i, len);
  389 + BUG_ON(gettimeofday(&tv_end, NULL));
  390 +
  391 + timersub(&tv_end, &tv_start, &tv_diff);
  392 +
  393 + free(dst);
  394 + return (double)(((double)len * iterations) / timeval2double(&tv_diff));
  395 +}
  396 +
  397 +static const char * const bench_mem_memset_usage[] = {
  398 + "perf bench mem memset <options>",
  399 + NULL
  400 +};
  401 +
  402 +static const struct routine memset_routines[] = {
  403 + { .name ="default",
  404 + .desc = "Default memset() provided by glibc",
  405 + .fn.memset = memset },
  406 +#ifdef HAVE_ARCH_X86_64_SUPPORT
  407 +
  408 +#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
  409 +#include "mem-memset-x86-64-asm-def.h"
  410 +#undef MEMSET_FN
  411 +
  412 +#endif
  413 +
  414 + { .name = NULL,
  415 + .desc = NULL,
  416 + .fn.memset = NULL }
  417 +};
  418 +
  419 +int bench_mem_memset(int argc, const char **argv,
  420 + const char *prefix __maybe_unused)
  421 +{
  422 + struct bench_mem_info info = {
  423 + .routines = memset_routines,
  424 + .do_cycle = do_memset_cycle,
  425 + .do_gettimeofday = do_memset_gettimeofday,
  426 + .usage = bench_mem_memset_usage,
  427 + };
  428 +
  429 + return bench_mem_common(argc, argv, prefix, &info);
312 430 }
tools/perf/bench/mem-memset.c
1   -/*
2   - * mem-memset.c
3   - *
4   - * memset: Simple memory set in various ways
5   - *
6   - * Trivial clone of mem-memcpy.c.
7   - */
8   -
9   -#include "../perf.h"
10   -#include "../util/util.h"
11   -#include "../util/parse-options.h"
12   -#include "../util/header.h"
13   -#include "../util/cloexec.h"
14   -#include "bench.h"
15   -#include "mem-memset-arch.h"
16   -
17   -#include <stdio.h>
18   -#include <stdlib.h>
19   -#include <string.h>
20   -#include <sys/time.h>
21   -#include <errno.h>
22   -
23   -#define K 1024
24   -
25   -static const char *length_str = "1MB";
26   -static const char *routine = "default";
27   -static int iterations = 1;
28   -static bool use_cycle;
29   -static int cycle_fd;
30   -static bool only_prefault;
31   -static bool no_prefault;
32   -
33   -static const struct option options[] = {
34   - OPT_STRING('l', "length", &length_str, "1MB",
35   - "Specify length of memory to set. "
36   - "Available units: B, KB, MB, GB and TB (upper and lower)"),
37   - OPT_STRING('r', "routine", &routine, "default",
38   - "Specify routine to set"),
39   - OPT_INTEGER('i', "iterations", &iterations,
40   - "repeat memset() invocation this number of times"),
41   - OPT_BOOLEAN('c', "cycle", &use_cycle,
42   - "Use cycles event instead of gettimeofday() for measuring"),
43   - OPT_BOOLEAN('o', "only-prefault", &only_prefault,
44   - "Show only the result with page faults before memset()"),
45   - OPT_BOOLEAN('n', "no-prefault", &no_prefault,
46   - "Show only the result without page faults before memset()"),
47   - OPT_END()
48   -};
49   -
50   -typedef void *(*memset_t)(void *, int, size_t);
51   -
52   -struct routine {
53   - const char *name;
54   - const char *desc;
55   - memset_t fn;
56   -};
57   -
58   -static const struct routine routines[] = {
59   - { "default",
60   - "Default memset() provided by glibc",
61   - memset },
62   -#ifdef HAVE_ARCH_X86_64_SUPPORT
63   -
64   -#define MEMSET_FN(fn, name, desc) { name, desc, fn },
65   -#include "mem-memset-x86-64-asm-def.h"
66   -#undef MEMSET_FN
67   -
68   -#endif
69   -
70   - { NULL,
71   - NULL,
72   - NULL }
73   -};
74   -
75   -static const char * const bench_mem_memset_usage[] = {
76   - "perf bench mem memset <options>",
77   - NULL
78   -};
79   -
80   -static struct perf_event_attr cycle_attr = {
81   - .type = PERF_TYPE_HARDWARE,
82   - .config = PERF_COUNT_HW_CPU_CYCLES
83   -};
84   -
85   -static void init_cycle(void)
86   -{
87   - cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
88   - perf_event_open_cloexec_flag());
89   -
90   - if (cycle_fd < 0 && errno == ENOSYS)
91   - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
92   - else
93   - BUG_ON(cycle_fd < 0);
94   -}
95   -
96   -static u64 get_cycle(void)
97   -{
98   - int ret;
99   - u64 clk;
100   -
101   - ret = read(cycle_fd, &clk, sizeof(u64));
102   - BUG_ON(ret != sizeof(u64));
103   -
104   - return clk;
105   -}
106   -
107   -static double timeval2double(struct timeval *ts)
108   -{
109   - return (double)ts->tv_sec +
110   - (double)ts->tv_usec / (double)1000000;
111   -}
112   -
113   -static void alloc_mem(void **dst, size_t length)
114   -{
115   - *dst = zalloc(length);
116   - if (!*dst)
117   - die("memory allocation failed - maybe length is too large?\n");
118   -}
119   -
120   -static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault)
121   -{
122   - u64 cycle_start = 0ULL, cycle_end = 0ULL;
123   - void *dst = NULL;
124   - int i;
125   -
126   - alloc_mem(&dst, len);
127   -
128   - if (prefault)
129   - fn(dst, -1, len);
130   -
131   - cycle_start = get_cycle();
132   - for (i = 0; i < iterations; ++i)
133   - fn(dst, i, len);
134   - cycle_end = get_cycle();
135   -
136   - free(dst);
137   - return cycle_end - cycle_start;
138   -}
139   -
140   -static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
141   -{
142   - struct timeval tv_start, tv_end, tv_diff;
143   - void *dst = NULL;
144   - int i;
145   -
146   - alloc_mem(&dst, len);
147   -
148   - if (prefault)
149   - fn(dst, -1, len);
150   -
151   - BUG_ON(gettimeofday(&tv_start, NULL));
152   - for (i = 0; i < iterations; ++i)
153   - fn(dst, i, len);
154   - BUG_ON(gettimeofday(&tv_end, NULL));
155   -
156   - timersub(&tv_end, &tv_start, &tv_diff);
157   -
158   - free(dst);
159   - return (double)((double)len / timeval2double(&tv_diff));
160   -}
161   -
162   -#define pf (no_prefault ? 0 : 1)
163   -
164   -#define print_bps(x) do { \
165   - if (x < K) \
166   - printf(" %14lf B/Sec", x); \
167   - else if (x < K * K) \
168   - printf(" %14lfd KB/Sec", x / K); \
169   - else if (x < K * K * K) \
170   - printf(" %14lf MB/Sec", x / K / K); \
171   - else \
172   - printf(" %14lf GB/Sec", x / K / K / K); \
173   - } while (0)
174   -
175   -int bench_mem_memset(int argc, const char **argv,
176   - const char *prefix __maybe_unused)
177   -{
178   - int i;
179   - size_t len;
180   - double result_bps[2];
181   - u64 result_cycle[2];
182   -
183   - argc = parse_options(argc, argv, options,
184   - bench_mem_memset_usage, 0);
185   -
186   - if (no_prefault && only_prefault) {
187   - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
188   - return 1;
189   - }
190   -
191   - if (use_cycle)
192   - init_cycle();
193   -
194   - len = (size_t)perf_atoll((char *)length_str);
195   -
196   - result_cycle[0] = result_cycle[1] = 0ULL;
197   - result_bps[0] = result_bps[1] = 0.0;
198   -
199   - if ((s64)len <= 0) {
200   - fprintf(stderr, "Invalid length:%s\n", length_str);
201   - return 1;
202   - }
203   -
204   - /* same to without specifying either of prefault and no-prefault */
205   - if (only_prefault && no_prefault)
206   - only_prefault = no_prefault = false;
207   -
208   - for (i = 0; routines[i].name; i++) {
209   - if (!strcmp(routines[i].name, routine))
210   - break;
211   - }
212   - if (!routines[i].name) {
213   - printf("Unknown routine:%s\n", routine);
214   - printf("Available routines...\n");
215   - for (i = 0; routines[i].name; i++) {
216   - printf("\t%s ... %s\n",
217   - routines[i].name, routines[i].desc);
218   - }
219   - return 1;
220   - }
221   -
222   - if (bench_format == BENCH_FORMAT_DEFAULT)
223   - printf("# Copying %s Bytes ...\n\n", length_str);
224   -
225   - if (!only_prefault && !no_prefault) {
226   - /* show both of results */
227   - if (use_cycle) {
228   - result_cycle[0] =
229   - do_memset_cycle(routines[i].fn, len, false);
230   - result_cycle[1] =
231   - do_memset_cycle(routines[i].fn, len, true);
232   - } else {
233   - result_bps[0] =
234   - do_memset_gettimeofday(routines[i].fn,
235   - len, false);
236   - result_bps[1] =
237   - do_memset_gettimeofday(routines[i].fn,
238   - len, true);
239   - }
240   - } else {
241   - if (use_cycle) {
242   - result_cycle[pf] =
243   - do_memset_cycle(routines[i].fn,
244   - len, only_prefault);
245   - } else {
246   - result_bps[pf] =
247   - do_memset_gettimeofday(routines[i].fn,
248   - len, only_prefault);
249   - }
250   - }
251   -
252   - switch (bench_format) {
253   - case BENCH_FORMAT_DEFAULT:
254   - if (!only_prefault && !no_prefault) {
255   - if (use_cycle) {
256   - printf(" %14lf Cycle/Byte\n",
257   - (double)result_cycle[0]
258   - / (double)len);
259   - printf(" %14lf Cycle/Byte (with prefault)\n ",
260   - (double)result_cycle[1]
261   - / (double)len);
262   - } else {
263   - print_bps(result_bps[0]);
264   - printf("\n");
265   - print_bps(result_bps[1]);
266   - printf(" (with prefault)\n");
267   - }
268   - } else {
269   - if (use_cycle) {
270   - printf(" %14lf Cycle/Byte",
271   - (double)result_cycle[pf]
272   - / (double)len);
273   - } else
274   - print_bps(result_bps[pf]);
275   -
276   - printf("%s\n", only_prefault ? " (with prefault)" : "");
277   - }
278   - break;
279   - case BENCH_FORMAT_SIMPLE:
280   - if (!only_prefault && !no_prefault) {
281   - if (use_cycle) {
282   - printf("%lf %lf\n",
283   - (double)result_cycle[0] / (double)len,
284   - (double)result_cycle[1] / (double)len);
285   - } else {
286   - printf("%lf %lf\n",
287   - result_bps[0], result_bps[1]);
288   - }
289   - } else {
290   - if (use_cycle) {
291   - printf("%lf\n", (double)result_cycle[pf]
292   - / (double)len);
293   - } else
294   - printf("%lf\n", result_bps[pf]);
295   - }
296   - break;
297   - default:
298   - /* reaching this means there's some disaster: */
299   - die("unknown format: %d\n", bench_format);
300   - break;
301   - }
302   -
303   - return 0;
304   -}
tools/perf/builtin-buildid-cache.c
... ... @@ -285,12 +285,11 @@
285 285 struct str_node *pos;
286 286 int ret = 0;
287 287 bool force = false;
288   - char debugdir[PATH_MAX];
289 288 char const *add_name_list_str = NULL,
290 289 *remove_name_list_str = NULL,
291 290 *missing_filename = NULL,
292 291 *update_name_list_str = NULL,
293   - *kcore_filename;
  292 + *kcore_filename = NULL;
294 293 char sbuf[STRERR_BUFSIZE];
295 294  
296 295 struct perf_data_file file = {
297 296  
... ... @@ -335,13 +334,11 @@
335 334  
336 335 setup_pager();
337 336  
338   - snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
339   -
340 337 if (add_name_list_str) {
341 338 list = strlist__new(true, add_name_list_str);
342 339 if (list) {
343 340 strlist__for_each(pos, list)
344   - if (build_id_cache__add_file(pos->s, debugdir)) {
  341 + if (build_id_cache__add_file(pos->s, buildid_dir)) {
345 342 if (errno == EEXIST) {
346 343 pr_debug("%s already in the cache\n",
347 344 pos->s);
... ... @@ -359,7 +356,7 @@
359 356 list = strlist__new(true, remove_name_list_str);
360 357 if (list) {
361 358 strlist__for_each(pos, list)
362   - if (build_id_cache__remove_file(pos->s, debugdir)) {
  359 + if (build_id_cache__remove_file(pos->s, buildid_dir)) {
363 360 if (errno == ENOENT) {
364 361 pr_debug("%s wasn't in the cache\n",
365 362 pos->s);
... ... @@ -380,7 +377,7 @@
380 377 list = strlist__new(true, update_name_list_str);
381 378 if (list) {
382 379 strlist__for_each(pos, list)
383   - if (build_id_cache__update_file(pos->s, debugdir)) {
  380 + if (build_id_cache__update_file(pos->s, buildid_dir)) {
384 381 if (errno == ENOENT) {
385 382 pr_debug("%s wasn't in the cache\n",
386 383 pos->s);
... ... @@ -395,7 +392,7 @@
395 392 }
396 393  
397 394 if (kcore_filename &&
398   - build_id_cache__add_kcore(kcore_filename, debugdir, force))
  395 + build_id_cache__add_kcore(kcore_filename, buildid_dir, force))
399 396 pr_warning("Couldn't add %s\n", kcore_filename);
400 397  
401 398 out:
tools/perf/builtin-kvm.c
... ... @@ -1293,7 +1293,8 @@
1293 1293 OPT_UINTEGER('d', "display", &kvm->display_time,
1294 1294 "time in seconds between display updates"),
1295 1295 OPT_STRING(0, "event", &kvm->report_event, "report event",
1296   - "event for reporting: vmexit, mmio, ioport"),
  1296 + "event for reporting: "
  1297 + "vmexit, mmio (x86 only), ioport (x86 only)"),
1297 1298 OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1298 1299 "vcpu id to report"),
1299 1300 OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
tools/perf/builtin-trace.c
... ... @@ -2045,7 +2045,6 @@
2045 2045 unsigned long before;
2046 2046 const bool forks = argc > 0;
2047 2047 bool draining = false;
2048   - char sbuf[STRERR_BUFSIZE];
2049 2048  
2050 2049 trace->live = true;
2051 2050  
... ... @@ -2106,11 +2105,8 @@
2106 2105 goto out_error_open;
2107 2106  
2108 2107 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2109   - if (err < 0) {
2110   - fprintf(trace->output, "Couldn't mmap the events: %s\n",
2111   - strerror_r(errno, sbuf, sizeof(sbuf)));
2112   - goto out_delete_evlist;
2113   - }
  2108 + if (err < 0)
  2109 + goto out_error_mmap;
2114 2110  
2115 2111 perf_evlist__enable(evlist);
2116 2112  
... ... @@ -2210,6 +2206,10 @@
2210 2206 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2211 2207 goto out_error;
2212 2208  
  2209 +out_error_mmap:
  2210 + perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
  2211 + goto out_error;
  2212 +
2213 2213 out_error_open:
2214 2214 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2215 2215  
... ... @@ -2485,7 +2485,7 @@
2485 2485 .user_freq = UINT_MAX,
2486 2486 .user_interval = ULLONG_MAX,
2487 2487 .no_buffering = true,
2488   - .mmap_pages = 1024,
  2488 + .mmap_pages = UINT_MAX,
2489 2489 },
2490 2490 .output = stdout,
2491 2491 .show_comm = true,
... ... @@ -200,6 +200,16 @@
200 200 *envchanged = 1;
201 201 (*argv)++;
202 202 (*argc)--;
  203 + } else if (!strcmp(cmd, "--buildid-dir")) {
  204 + if (*argc < 2) {
  205 + fprintf(stderr, "No directory given for --buildid-dir.\n");
  206 + usage(perf_usage_string);
  207 + }
  208 + set_buildid_dir((*argv)[1]);
  209 + if (envchanged)
  210 + *envchanged = 1;
  211 + (*argv)++;
  212 + (*argc)--;
203 213 } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
204 214 perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR));
205 215 fprintf(stderr, "dir: %s\n", debugfs_mountpoint);
... ... @@ -499,7 +509,7 @@
499 509 }
500 510 if (!prefixcmp(cmd, "trace")) {
501 511 #ifdef HAVE_LIBAUDIT_SUPPORT
502   - set_buildid_dir();
  512 + set_buildid_dir(NULL);
503 513 setup_path();
504 514 argv[0] = "trace";
505 515 return cmd_trace(argc, argv, NULL);
... ... @@ -514,7 +524,7 @@
514 524 argc--;
515 525 handle_options(&argv, &argc, NULL);
516 526 commit_pager_choice();
517   - set_buildid_dir();
  527 + set_buildid_dir(NULL);
518 528  
519 529 if (argc > 0) {
520 530 if (!prefixcmp(argv[0], "--"))
tools/perf/tests/attr/base-record
... ... @@ -5,7 +5,7 @@
5 5 flags=0|8
6 6 cpu=*
7 7 type=0|1
8   -size=96
  8 +size=104
9 9 config=0
10 10 sample_period=4000
11 11 sample_type=263
tools/perf/tests/attr/base-stat
... ... @@ -5,7 +5,7 @@
5 5 flags=0|8
6 6 cpu=*
7 7 type=0
8   -size=96
  8 +size=104
9 9 config=0
10 10 sample_period=0
11 11 sample_type=0
tools/perf/ui/browsers/hists.c
... ... @@ -1252,7 +1252,7 @@
1252 1252  
1253 1253 nr_samples = convert_unit(nr_samples, &unit);
1254 1254 printed = scnprintf(bf, size,
1255   - "Samples: %lu%c of event '%s', Event count (approx.): %lu",
  1255 + "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64,
1256 1256 nr_samples, unit, ev_name, nr_events);
1257 1257  
1258 1258  
tools/perf/ui/hist.c
... ... @@ -162,8 +162,8 @@
162 162 return ret;
163 163  
164 164 nr_members = evsel->nr_members;
165   - fields_a = calloc(sizeof(*fields_a), nr_members);
166   - fields_b = calloc(sizeof(*fields_b), nr_members);
  165 + fields_a = calloc(nr_members, sizeof(*fields_a));
  166 + fields_b = calloc(nr_members, sizeof(*fields_b));
167 167  
168 168 if (!fields_a || !fields_b)
169 169 goto out;
tools/perf/util/build-id.c
... ... @@ -410,21 +410,18 @@
410 410 {
411 411 struct rb_node *nd;
412 412 int ret;
413   - char debugdir[PATH_MAX];
414 413  
415 414 if (no_buildid_cache)
416 415 return 0;
417 416  
418   - snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
419   -
420   - if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
  417 + if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
421 418 return -1;
422 419  
423   - ret = machine__cache_build_ids(&session->machines.host, debugdir);
  420 + ret = machine__cache_build_ids(&session->machines.host, buildid_dir);
424 421  
425 422 for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
426 423 struct machine *pos = rb_entry(nd, struct machine, rb_node);
427   - ret |= machine__cache_build_ids(pos, debugdir);
  424 + ret |= machine__cache_build_ids(pos, buildid_dir);
428 425 }
429 426 return ret ? -1 : 0;
430 427 }
tools/perf/util/callchain.c
... ... @@ -77,7 +77,7 @@
77 77 ret = 0;
78 78 } else
79 79 pr_err("callchain: No more arguments "
80   - "needed for -g fp\n");
  80 + "needed for --call-graph fp\n");
81 81 break;
82 82  
83 83 #ifdef HAVE_DWARF_UNWIND_SUPPORT
tools/perf/util/config.c
... ... @@ -522,7 +522,7 @@
522 522 const char *v;
523 523  
524 524 /* same dir for all commands */
525   - if (!prefixcmp(var, "buildid.") && !strcmp(var + 8, "dir")) {
  525 + if (!strcmp(var, "buildid.dir")) {
526 526 v = perf_config_dirname(var, value);
527 527 if (!v)
528 528 return -1;
529 529  
530 530  
... ... @@ -539,12 +539,14 @@
539 539 perf_config(buildid_dir_command_config, &c);
540 540 }
541 541  
542   -void set_buildid_dir(void)
  542 +void set_buildid_dir(const char *dir)
543 543 {
544   - buildid_dir[0] = '\0';
  544 + if (dir)
  545 + scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
545 546  
546 547 /* try config file */
547   - check_buildid_dir_config();
  548 + if (buildid_dir[0] == '\0')
  549 + check_buildid_dir_config();
548 550  
549 551 /* default to $HOME/.debug */
550 552 if (buildid_dir[0] == '\0') {
tools/perf/util/evlist.c
... ... @@ -8,6 +8,7 @@
8 8 */
9 9 #include "util.h"
10 10 #include <api/fs/debugfs.h>
  11 +#include <api/fs/fs.h>
11 12 #include <poll.h>
12 13 #include "cpumap.h"
13 14 #include "thread_map.h"
... ... @@ -24,6 +25,7 @@
24 25  
25 26 #include <linux/bitops.h>
26 27 #include <linux/hash.h>
  28 +#include <linux/log2.h>
27 29  
28 30 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
29 31 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
... ... @@ -892,10 +894,24 @@
892 894  
893 895 static size_t perf_evlist__mmap_size(unsigned long pages)
894 896 {
895   - /* 512 kiB: default amount of unprivileged mlocked memory */
896   - if (pages == UINT_MAX)
897   - pages = (512 * 1024) / page_size;
898   - else if (!is_power_of_2(pages))
  897 + if (pages == UINT_MAX) {
  898 + int max;
  899 +
  900 + if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
  901 + /*
  902 + * Pick a once upon a time good value, i.e. things look
  903 + * strange since we can't read a sysctl value, but lets not
  904 + * die yet...
  905 + */
  906 + max = 512;
  907 + } else {
  908 + max -= (page_size / 1024);
  909 + }
  910 +
  911 + pages = (max * 1024) / page_size;
  912 + if (!is_power_of_2(pages))
  913 + pages = rounddown_pow_of_two(pages);
  914 + } else if (!is_power_of_2(pages))
899 915 return 0;
900 916  
901 917 return (pages + 1) * page_size;
... ... @@ -932,7 +948,7 @@
932 948 /* leave number of pages at 0 */
933 949 } else if (!is_power_of_2(pages)) {
934 950 /* round pages up to next power of 2 */
935   - pages = next_pow2_l(pages);
  951 + pages = roundup_pow_of_two(pages);
936 952 if (!pages)
937 953 return -EINVAL;
938 954 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
... ... @@ -1474,6 +1490,37 @@
1474 1490 printed += scnprintf(buf + printed, size - printed,
1475 1491 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1476 1492 "Hint:\tThe current value is %d.", value);
  1493 + break;
  1494 + default:
  1495 + scnprintf(buf, size, "%s", emsg);
  1496 + break;
  1497 + }
  1498 +
  1499 + return 0;
  1500 +}
  1501 +
  1502 +int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
  1503 +{
  1504 + char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
  1505 + int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
  1506 +
  1507 + switch (err) {
  1508 + case EPERM:
  1509 + sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
  1510 + printed += scnprintf(buf + printed, size - printed,
  1511 + "Error:\t%s.\n"
  1512 + "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
  1513 + "Hint:\tTried using %zd kB.\n",
  1514 + emsg, pages_max_per_user, pages_attempted);
  1515 +
  1516 + if (pages_attempted >= pages_max_per_user) {
  1517 + printed += scnprintf(buf + printed, size - printed,
  1518 + "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
  1519 + pages_max_per_user + pages_attempted);
  1520 + }
  1521 +
  1522 + printed += scnprintf(buf + printed, size - printed,
  1523 + "Hint:\tTry using a smaller -m/--mmap-pages value.");
1477 1524 break;
1478 1525 default:
1479 1526 scnprintf(buf, size, "%s", emsg);
tools/perf/util/evlist.h
... ... @@ -185,6 +185,7 @@
185 185  
186 186 int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size);
187 187 int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
  188 +int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
188 189  
189 190 static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
190 191 {
tools/perf/util/include/linux/bitops.h
1   -#ifndef _PERF_LINUX_BITOPS_H_
2   -#define _PERF_LINUX_BITOPS_H_
3   -
4   -#include <linux/kernel.h>
5   -#include <linux/compiler.h>
6   -#include <asm/hweight.h>
7   -
8   -#ifndef __WORDSIZE
9   -#define __WORDSIZE (__SIZEOF_LONG__ * 8)
10   -#endif
11   -
12   -#define BITS_PER_LONG __WORDSIZE
13   -#define BITS_PER_BYTE 8
14   -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
15   -#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
16   -#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
17   -#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE)
18   -#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
19   -#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
20   -
21   -#define for_each_set_bit(bit, addr, size) \
22   - for ((bit) = find_first_bit((addr), (size)); \
23   - (bit) < (size); \
24   - (bit) = find_next_bit((addr), (size), (bit) + 1))
25   -
26   -/* same as for_each_set_bit() but use bit as value to start with */
27   -#define for_each_set_bit_from(bit, addr, size) \
28   - for ((bit) = find_next_bit((addr), (size), (bit)); \
29   - (bit) < (size); \
30   - (bit) = find_next_bit((addr), (size), (bit) + 1))
31   -
32   -static inline void set_bit(int nr, unsigned long *addr)
33   -{
34   - addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
35   -}
36   -
37   -static inline void clear_bit(int nr, unsigned long *addr)
38   -{
39   - addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
40   -}
41   -
42   -static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
43   -{
44   - return ((1UL << (nr % BITS_PER_LONG)) &
45   - (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
46   -}
47   -
48   -static inline unsigned long hweight_long(unsigned long w)
49   -{
50   - return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
51   -}
52   -
53   -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
54   -
55   -/**
56   - * __ffs - find first bit in word.
57   - * @word: The word to search
58   - *
59   - * Undefined if no bit exists, so code should check against 0 first.
60   - */
61   -static __always_inline unsigned long __ffs(unsigned long word)
62   -{
63   - int num = 0;
64   -
65   -#if BITS_PER_LONG == 64
66   - if ((word & 0xffffffff) == 0) {
67   - num += 32;
68   - word >>= 32;
69   - }
70   -#endif
71   - if ((word & 0xffff) == 0) {
72   - num += 16;
73   - word >>= 16;
74   - }
75   - if ((word & 0xff) == 0) {
76   - num += 8;
77   - word >>= 8;
78   - }
79   - if ((word & 0xf) == 0) {
80   - num += 4;
81   - word >>= 4;
82   - }
83   - if ((word & 0x3) == 0) {
84   - num += 2;
85   - word >>= 2;
86   - }
87   - if ((word & 0x1) == 0)
88   - num += 1;
89   - return num;
90   -}
91   -
92   -typedef const unsigned long __attribute__((__may_alias__)) long_alias_t;
93   -
94   -/*
95   - * Find the first set bit in a memory region.
96   - */
97   -static inline unsigned long
98   -find_first_bit(const unsigned long *addr, unsigned long size)
99   -{
100   - long_alias_t *p = (long_alias_t *) addr;
101   - unsigned long result = 0;
102   - unsigned long tmp;
103   -
104   - while (size & ~(BITS_PER_LONG-1)) {
105   - if ((tmp = *(p++)))
106   - goto found;
107   - result += BITS_PER_LONG;
108   - size -= BITS_PER_LONG;
109   - }
110   - if (!size)
111   - return result;
112   -
113   - tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
114   - if (tmp == 0UL) /* Are any bits set? */
115   - return result + size; /* Nope. */
116   -found:
117   - return result + __ffs(tmp);
118   -}
119   -
120   -/*
121   - * Find the next set bit in a memory region.
122   - */
123   -static inline unsigned long
124   -find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)
125   -{
126   - const unsigned long *p = addr + BITOP_WORD(offset);
127   - unsigned long result = offset & ~(BITS_PER_LONG-1);
128   - unsigned long tmp;
129   -
130   - if (offset >= size)
131   - return size;
132   - size -= result;
133   - offset %= BITS_PER_LONG;
134   - if (offset) {
135   - tmp = *(p++);
136   - tmp &= (~0UL << offset);
137   - if (size < BITS_PER_LONG)
138   - goto found_first;
139   - if (tmp)
140   - goto found_middle;
141   - size -= BITS_PER_LONG;
142   - result += BITS_PER_LONG;
143   - }
144   - while (size & ~(BITS_PER_LONG-1)) {
145   - if ((tmp = *(p++)))
146   - goto found_middle;
147   - result += BITS_PER_LONG;
148   - size -= BITS_PER_LONG;
149   - }
150   - if (!size)
151   - return result;
152   - tmp = *p;
153   -
154   -found_first:
155   - tmp &= (~0UL >> (BITS_PER_LONG - size));
156   - if (tmp == 0UL) /* Are any bits set? */
157   - return result + size; /* Nope. */
158   -found_middle:
159   - return result + __ffs(tmp);
160   -}
161   -
162   -#endif
tools/perf/util/machine.c
... ... @@ -1385,19 +1385,46 @@
1385 1385 static int add_callchain_ip(struct thread *thread,
1386 1386 struct symbol **parent,
1387 1387 struct addr_location *root_al,
1388   - int cpumode,
  1388 + bool branch_history,
1389 1389 u64 ip)
1390 1390 {
1391 1391 struct addr_location al;
1392 1392  
1393 1393 al.filtered = 0;
1394 1394 al.sym = NULL;
1395   - if (cpumode == -1)
  1395 + if (branch_history)
1396 1396 thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
1397 1397 ip, &al);
1398   - else
  1398 + else {
  1399 + u8 cpumode = PERF_RECORD_MISC_USER;
  1400 +
  1401 + if (ip >= PERF_CONTEXT_MAX) {
  1402 + switch (ip) {
  1403 + case PERF_CONTEXT_HV:
  1404 + cpumode = PERF_RECORD_MISC_HYPERVISOR;
  1405 + break;
  1406 + case PERF_CONTEXT_KERNEL:
  1407 + cpumode = PERF_RECORD_MISC_KERNEL;
  1408 + break;
  1409 + case PERF_CONTEXT_USER:
  1410 + cpumode = PERF_RECORD_MISC_USER;
  1411 + break;
  1412 + default:
  1413 + pr_debug("invalid callchain context: "
  1414 + "%"PRId64"\n", (s64) ip);
  1415 + /*
  1416 + * It seems the callchain is corrupted.
  1417 + * Discard all.
  1418 + */
  1419 + callchain_cursor_reset(&callchain_cursor);
  1420 + return 1;
  1421 + }
  1422 + return 0;
  1423 + }
1399 1424 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1400 1425 ip, &al);
  1426 + }
  1427 +
1401 1428 if (al.sym != NULL) {
1402 1429 if (sort__has_parent && !*parent &&
1403 1430 symbol__match_regex(al.sym, &parent_regex))
1404 1431  
... ... @@ -1480,11 +1507,8 @@
1480 1507 struct addr_location *root_al,
1481 1508 int max_stack)
1482 1509 {
1483   - u8 cpumode = PERF_RECORD_MISC_USER;
1484 1510 int chain_nr = min(max_stack, (int)chain->nr);
1485   - int i;
1486   - int j;
1487   - int err;
  1511 + int i, j, err;
1488 1512 int skip_idx = -1;
1489 1513 int first_call = 0;
1490 1514  
1491 1515  
... ... @@ -1542,10 +1566,10 @@
1542 1566  
1543 1567 for (i = 0; i < nr; i++) {
1544 1568 err = add_callchain_ip(thread, parent, root_al,
1545   - -1, be[i].to);
  1569 + true, be[i].to);
1546 1570 if (!err)
1547 1571 err = add_callchain_ip(thread, parent, root_al,
1548   - -1, be[i].from);
  1572 + true, be[i].from);
1549 1573 if (err == -EINVAL)
1550 1574 break;
1551 1575 if (err)
1552 1576  
1553 1577  
... ... @@ -1574,36 +1598,10 @@
1574 1598 #endif
1575 1599 ip = chain->ips[j];
1576 1600  
1577   - if (ip >= PERF_CONTEXT_MAX) {
1578   - switch (ip) {
1579   - case PERF_CONTEXT_HV:
1580   - cpumode = PERF_RECORD_MISC_HYPERVISOR;
1581   - break;
1582   - case PERF_CONTEXT_KERNEL:
1583   - cpumode = PERF_RECORD_MISC_KERNEL;
1584   - break;
1585   - case PERF_CONTEXT_USER:
1586   - cpumode = PERF_RECORD_MISC_USER;
1587   - break;
1588   - default:
1589   - pr_debug("invalid callchain context: "
1590   - "%"PRId64"\n", (s64) ip);
1591   - /*
1592   - * It seems the callchain is corrupted.
1593   - * Discard all.
1594   - */
1595   - callchain_cursor_reset(&callchain_cursor);
1596   - return 0;
1597   - }
1598   - continue;
1599   - }
  1601 + err = add_callchain_ip(thread, parent, root_al, false, ip);
1600 1602  
1601   - err = add_callchain_ip(thread, parent, root_al,
1602   - cpumode, ip);
1603   - if (err == -EINVAL)
1604   - break;
1605 1603 if (err)
1606   - return err;
  1604 + return (err < 0) ? err : 0;
1607 1605 }
1608 1606  
1609 1607 return 0;
tools/perf/util/record.c
... ... @@ -137,16 +137,7 @@
137 137  
138 138 static int get_max_rate(unsigned int *rate)
139 139 {
140   - char path[PATH_MAX];
141   - const char *procfs = procfs__mountpoint();
142   -
143   - if (!procfs)
144   - return -1;
145   -
146   - snprintf(path, PATH_MAX,
147   - "%s/sys/kernel/perf_event_max_sample_rate", procfs);
148   -
149   - return filename__read_int(path, (int *) rate);
  140 + return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
150 141 }
151 142  
152 143 static int record_opts__config_freq(struct record_opts *opts)
tools/perf/util/srcline.c
... ... @@ -20,7 +20,7 @@
20 20  
21 21 struct a2l_data {
22 22 const char *input;
23   - unsigned long addr;
  23 + u64 addr;
24 24  
25 25 bool found;
26 26 const char *filename;
... ... @@ -147,7 +147,7 @@
147 147 free(a2l);
148 148 }
149 149  
150   -static int addr2line(const char *dso_name, unsigned long addr,
  150 +static int addr2line(const char *dso_name, u64 addr,
151 151 char **file, unsigned int *line, struct dso *dso)
152 152 {
153 153 int ret = 0;
... ... @@ -193,7 +193,7 @@
193 193  
194 194 #else /* HAVE_LIBBFD_SUPPORT */
195 195  
196   -static int addr2line(const char *dso_name, unsigned long addr,
  196 +static int addr2line(const char *dso_name, u64 addr,
197 197 char **file, unsigned int *line_nr,
198 198 struct dso *dso __maybe_unused)
199 199 {
... ... @@ -252,7 +252,7 @@
252 252 */
253 253 #define A2L_FAIL_LIMIT 123
254 254  
255   -char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
  255 +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
256 256 bool show_sym)
257 257 {
258 258 char *file = NULL;
259 259  
... ... @@ -293,10 +293,10 @@
293 293 dso__free_a2l(dso);
294 294 }
295 295 if (sym) {
296   - if (asprintf(&srcline, "%s+%ld", show_sym ? sym->name : "",
  296 + if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "",
297 297 addr - sym->start) < 0)
298 298 return SRCLINE_UNKNOWN;
299   - } else if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0)
  299 + } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)
300 300 return SRCLINE_UNKNOWN;
301 301 return srcline;
302 302 }
tools/perf/util/symbol-minimal.c
... ... @@ -129,6 +129,7 @@
129 129  
130 130 for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
131 131 void *tmp;
  132 + long offset;
132 133  
133 134 if (need_swap) {
134 135 phdr->p_type = bswap_32(phdr->p_type);
135 136  
... ... @@ -140,12 +141,13 @@
140 141 continue;
141 142  
142 143 buf_size = phdr->p_filesz;
  144 + offset = phdr->p_offset;
143 145 tmp = realloc(buf, buf_size);
144 146 if (tmp == NULL)
145 147 goto out_free;
146 148  
147 149 buf = tmp;
148   - fseek(fp, phdr->p_offset, SEEK_SET);
  150 + fseek(fp, offset, SEEK_SET);
149 151 if (fread(buf, buf_size, 1, fp) != 1)
150 152 goto out_free;
151 153  
... ... @@ -178,6 +180,7 @@
178 180  
179 181 for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
180 182 void *tmp;
  183 + long offset;
181 184  
182 185 if (need_swap) {
183 186 phdr->p_type = bswap_32(phdr->p_type);
184 187  
... ... @@ -189,12 +192,13 @@
189 192 continue;
190 193  
191 194 buf_size = phdr->p_filesz;
  195 + offset = phdr->p_offset;
192 196 tmp = realloc(buf, buf_size);
193 197 if (tmp == NULL)
194 198 goto out_free;
195 199  
196 200 buf = tmp;
197   - fseek(fp, phdr->p_offset, SEEK_SET);
  201 + fseek(fp, offset, SEEK_SET);
198 202 if (fread(buf, buf_size, 1, fp) != 1)
199 203 goto out_free;
200 204  
tools/perf/util/util.c
... ... @@ -442,23 +442,6 @@
442 442 return (unsigned long) -1;
443 443 }
444 444  
445   -int filename__read_int(const char *filename, int *value)
446   -{
447   - char line[64];
448   - int fd = open(filename, O_RDONLY), err = -1;
449   -
450   - if (fd < 0)
451   - return -1;
452   -
453   - if (read(fd, line, sizeof(line)) > 0) {
454   - *value = atoi(line);
455   - err = 0;
456   - }
457   -
458   - close(fd);
459   - return err;
460   -}
461   -
462 445 int filename__read_str(const char *filename, char **buf, size_t *sizep)
463 446 {
464 447 size_t size = 0, alloc_size = 0;
465 448  
... ... @@ -523,16 +506,9 @@
523 506  
524 507 int perf_event_paranoid(void)
525 508 {
526   - char path[PATH_MAX];
527   - const char *procfs = procfs__mountpoint();
528 509 int value;
529 510  
530   - if (!procfs)
531   - return INT_MAX;
532   -
533   - scnprintf(path, PATH_MAX, "%s/sys/kernel/perf_event_paranoid", procfs);
534   -
535   - if (filename__read_int(path, &value))
  511 + if (sysctl__read_int("kernel/perf_event_paranoid", &value))
536 512 return INT_MAX;
537 513  
538 514 return value;
tools/perf/util/util.h
... ... @@ -153,7 +153,7 @@
153 153 extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
154 154  
155 155 extern int prefixcmp(const char *str, const char *prefix);
156   -extern void set_buildid_dir(void);
  156 +extern void set_buildid_dir(const char *dir);
157 157  
158 158 static inline const char *skip_prefix(const char *str, const char *prefix)
159 159 {
... ... @@ -269,35 +269,6 @@
269 269 #define _STR(x) #x
270 270 #define STR(x) _STR(x)
271 271  
272   -/*
273   - * Determine whether some value is a power of two, where zero is
274   - * *not* considered a power of two.
275   - */
276   -
277   -static inline __attribute__((const))
278   -bool is_power_of_2(unsigned long n)
279   -{
280   - return (n != 0 && ((n & (n - 1)) == 0));
281   -}
282   -
283   -static inline unsigned next_pow2(unsigned x)
284   -{
285   - if (!x)
286   - return 1;
287   - return 1ULL << (32 - __builtin_clz(x - 1));
288   -}
289   -
290   -static inline unsigned long next_pow2_l(unsigned long x)
291   -{
292   -#if BITS_PER_LONG == 64
293   - if (x <= (1UL << 31))
294   - return next_pow2(x);
295   - return (unsigned long)next_pow2(x >> 32) << 32;
296   -#else
297   - return next_pow2(x);
298   -#endif
299   -}
300   -
301 272 size_t hex_width(u64 v);
302 273 int hex2u64(const char *ptr, u64 *val);
303 274  
304 275  
... ... @@ -339,11 +310,10 @@
339 310 struct dso;
340 311 struct symbol;
341 312  
342   -char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
  313 +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
343 314 bool show_sym);
344 315 void free_srcline(char *srcline);
345 316  
346   -int filename__read_int(const char *filename, int *value);
347 317 int filename__read_str(const char *filename, char **buf, size_t *sizep);
348 318 int perf_event_paranoid(void);
349 319  
tools/thermal/tmon/sysfs.c
... ... @@ -446,7 +446,7 @@
446 446 return -1;
447 447 }
448 448  
449   - ptdata.tzi = calloc(sizeof(struct tz_info), ptdata.max_tz_instance+1);
  449 + ptdata.tzi = calloc(ptdata.max_tz_instance+1, sizeof(struct tz_info));
450 450 if (!ptdata.tzi) {
451 451 fprintf(stderr, "Err: allocate tz_info\n");
452 452 return -1;
... ... @@ -454,8 +454,8 @@
454 454  
455 455 /* we still show thermal zone information if there is no cdev */
456 456 if (ptdata.nr_cooling_dev) {
457   - ptdata.cdi = calloc(sizeof(struct cdev_info),
458   - ptdata.max_cdev_instance + 1);
  457 + ptdata.cdi = calloc(ptdata.max_cdev_instance + 1,
  458 + sizeof(struct cdev_info));
459 459 if (!ptdata.cdi) {
460 460 free(ptdata.tzi);
461 461 fprintf(stderr, "Err: allocate cdev_info\n");