Blame view
tools/perf/bench/mem-functions.c
8.6 KB
827f3b497
|
1 2 3 |
/* * mem-memcpy.c * |
13839ec49
|
4 |
* Simple memcpy() and memset() benchmarks |
827f3b497
|
5 6 7 |
* * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> */ |
827f3b497
|
8 |
|
c2a218c63
|
9 |
#include "debug.h" |
827f3b497
|
10 11 |
#include "../perf.h" #include "../util/util.h" |
4b6ab94ea
|
12 |
#include <subcmd/parse-options.h> |
827f3b497
|
13 |
#include "../util/header.h" |
57480d2cd
|
14 |
#include "../util/cloexec.h" |
a067558e2
|
15 |
#include "../util/string2.h" |
827f3b497
|
16 |
#include "bench.h" |
49ce8fc65
|
17 |
#include "mem-memcpy-arch.h" |
5bce1a577
|
18 |
#include "mem-memset-arch.h" |
827f3b497
|
19 20 21 22 23 24 |
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/time.h> #include <errno.h> |
f2b91be73
|
25 |
#include <linux/time64.h> |
827f3b497
|
26 27 |
#define K 1024 |
a69b4f741
|
28 |
static const char *size_str = "1MB"; |
2f211c84a
|
29 |
static const char *function_str = "all"; |
b0d22e52e
|
30 |
static int nr_loops = 1; |
b14f2d357
|
31 32 |
static bool use_cycles; static int cycles_fd; |
827f3b497
|
33 34 |
static const struct option options[] = { |
b0d22e52e
|
35 |
OPT_STRING('s', "size", &size_str, "1MB", |
a69b4f741
|
36 |
"Specify the size of the memory buffers. " |
13b1fdce8
|
37 |
"Available units: B, KB, MB, GB and TB (case insensitive)"), |
2f211c84a
|
38 39 |
OPT_STRING('f', "function", &function_str, "all", "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), |
13b1fdce8
|
40 |
|
b0d22e52e
|
41 42 |
OPT_INTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run. (default: 1)"), |
13b1fdce8
|
43 |
|
b14f2d357
|
44 45 |
OPT_BOOLEAN('c', "cycles", &use_cycles, "Use a cycles event instead of gettimeofday() to measure performance"), |
13b1fdce8
|
46 |
|
827f3b497
|
47 48 |
OPT_END() }; |
49ce8fc65
|
49 |
typedef void *(*memcpy_t)(void *, const void *, size_t); |
5bce1a577
|
50 |
typedef void *(*memset_t)(void *, int, size_t); |
49ce8fc65
|
51 |
|
2f211c84a
|
52 |
struct function { |
827f3b497
|
53 54 |
const char *name; const char *desc; |
308197b94
|
55 56 |
union { memcpy_t memcpy; |
5bce1a577
|
57 |
memset_t memset; |
308197b94
|
58 |
} fn; |
827f3b497
|
59 |
}; |
17d7a1123
|
60 |
static struct perf_event_attr cycle_attr = { |
12eac0bf0
|
61 62 |
.type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES |
827f3b497
|
63 |
}; |
c2a218c63
|
64 |
static int init_cycles(void) |
827f3b497
|
65 |
{ |
b14f2d357
|
66 |
cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); |
12eac0bf0
|
67 |
|
c2a218c63
|
68 69 70 71 72 73 74 |
if (cycles_fd < 0 && errno == ENOSYS) { pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured? "); return -1; } return cycles_fd; |
827f3b497
|
75 |
} |
b14f2d357
|
76 |
static u64 get_cycles(void) |
827f3b497
|
77 78 79 |
{ int ret; u64 clk; |
b14f2d357
|
80 |
ret = read(cycles_fd, &clk, sizeof(u64)); |
827f3b497
|
81 82 83 84 85 86 87 |
BUG_ON(ret != sizeof(u64)); return clk; } static double timeval2double(struct timeval *ts) { |
f2b91be73
|
88 |
return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; |
827f3b497
|
89 |
} |
6db175c73
|
90 91 |
#define print_bps(x) do { \ if (x < K) \ |
13b1fdce8
|
92 93 |
printf(" %14lf bytes/sec ", x); \ |
6db175c73
|
94 |
else if (x < K * K) \ |
13b1fdce8
|
95 96 |
printf(" %14lfd KB/sec ", x / K); \ |
6db175c73
|
97 |
else if (x < K * K * K) \ |
13b1fdce8
|
98 99 |
printf(" %14lf MB/sec ", x / K / K); \ |
6db175c73
|
100 |
else \ |
13b1fdce8
|
101 102 |
printf(" %14lf GB/sec ", x / K / K / K); \ |
49ce8fc65
|
103 |
} while (0) |
308197b94
|
104 |
struct bench_mem_info { |
2f211c84a
|
105 |
const struct function *functions; |
47b5757ba
|
106 107 |
u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); |
308197b94
|
108 |
const char *const *usage; |
47b5757ba
|
109 |
bool alloc_src; |
308197b94
|
110 |
}; |
2f211c84a
|
111 |
static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) |
827f3b497
|
112 |
{ |
2f211c84a
|
113 |
const struct function *r = &info->functions[r_idx]; |
6db175c73
|
114 |
double result_bps = 0.0; |
b14f2d357
|
115 |
u64 result_cycles = 0; |
47b5757ba
|
116 |
void *src = NULL, *dst = zalloc(size); |
49ce8fc65
|
117 |
|
2f211c84a
|
118 119 |
printf("# function '%s' (%s) ", r->name, r->desc); |
827f3b497
|
120 |
|
47b5757ba
|
121 122 123 124 125 126 127 128 |
if (dst == NULL) goto out_alloc_failed; if (info->alloc_src) { src = zalloc(size); if (src == NULL) goto out_alloc_failed; } |
49ce8fc65
|
129 |
if (bench_format == BENCH_FORMAT_DEFAULT) |
13b1fdce8
|
130 131 132 |
printf("# Copying %s bytes ... ", size_str); |
827f3b497
|
133 |
|
b14f2d357
|
134 |
if (use_cycles) { |
47b5757ba
|
135 |
result_cycles = info->do_cycles(r, size, src, dst); |
827f3b497
|
136 |
} else { |
47b5757ba
|
137 |
result_bps = info->do_gettimeofday(r, size, src, dst); |
827f3b497
|
138 139 140 141 |
} switch (bench_format) { case BENCH_FORMAT_DEFAULT: |
b14f2d357
|
142 |
if (use_cycles) { |
13b1fdce8
|
143 144 |
printf(" %14lf cycles/byte ", (double)result_cycles/size_total); |
49ce8fc65
|
145 |
} else { |
6db175c73
|
146 |
print_bps(result_bps); |
827f3b497
|
147 148 |
} break; |
6db175c73
|
149 |
|
827f3b497
|
150 |
case BENCH_FORMAT_SIMPLE: |
b14f2d357
|
151 |
if (use_cycles) { |
a69b4f741
|
152 153 |
printf("%lf ", (double)result_cycles/size_total); |
49ce8fc65
|
154 |
} else { |
6db175c73
|
155 156 |
printf("%lf ", result_bps); |
49ce8fc65
|
157 |
} |
827f3b497
|
158 |
break; |
6db175c73
|
159 |
|
827f3b497
|
160 |
default: |
6db175c73
|
161 |
BUG_ON(1); |
827f3b497
|
162 163 |
break; } |
47b5757ba
|
164 165 166 167 168 169 170 171 172 |
out_free: free(src); free(dst); return; out_alloc_failed: printf("# Memory allocation failed - maybe size (%s) is too large? ", size_str); goto out_free; |
515e23f01
|
173 |
} |
2946f59ac
|
174 |
static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) |
515e23f01
|
175 176 |
{ int i; |
a69b4f741
|
177 178 |
size_t size; double size_total; |
515e23f01
|
179 |
|
13839ec49
|
180 |
argc = parse_options(argc, argv, options, info->usage, 0); |
515e23f01
|
181 |
|
c2a218c63
|
182 183 184 185 186 187 188 189 |
if (use_cycles) { i = init_cycles(); if (i < 0) { fprintf(stderr, "Failed to open cycles counter "); return i; } } |
515e23f01
|
190 |
|
a69b4f741
|
191 |
size = (size_t)perf_atoll((char *)size_str); |
b0d22e52e
|
192 |
size_total = (double)size * nr_loops; |
515e23f01
|
193 |
|
a69b4f741
|
194 195 196 |
if ((s64)size <= 0) { fprintf(stderr, "Invalid size:%s ", size_str); |
515e23f01
|
197 198 |
return 1; } |
2f211c84a
|
199 200 201 |
if (!strncmp(function_str, "all", 3)) { for (i = 0; info->functions[i].name; i++) __bench_mem_function(info, i, size, size_total); |
dfecb95cd
|
202 203 |
return 0; } |
2f211c84a
|
204 205 |
for (i = 0; info->functions[i].name; i++) { if (!strcmp(info->functions[i].name, function_str)) |
515e23f01
|
206 207 |
break; } |
2f211c84a
|
208 209 210 211 212 213 214 |
if (!info->functions[i].name) { if (strcmp(function_str, "help") && strcmp(function_str, "h")) printf("Unknown function: %s ", function_str); printf("Available functions: "); for (i = 0; info->functions[i].name; i++) { |
515e23f01
|
215 216 |
printf("\t%s ... %s ", |
2f211c84a
|
217 |
info->functions[i].name, info->functions[i].desc); |
515e23f01
|
218 219 220 |
} return 1; } |
2f211c84a
|
221 |
__bench_mem_function(info, i, size, size_total); |
827f3b497
|
222 223 224 |
return 0; } |
308197b94
|
225 |
|
47b5757ba
|
226 |
static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) |
308197b94
|
227 228 |
{ u64 cycle_start = 0ULL, cycle_end = 0ULL; |
308197b94
|
229 230 |
memcpy_t fn = r->fn.memcpy; int i; |
47b5757ba
|
231 232 |
/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ memset(src, 0, size); |
308197b94
|
233 |
|
6db175c73
|
234 235 236 237 |
/* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ |
a69b4f741
|
238 |
fn(dst, src, size); |
308197b94
|
239 |
|
b14f2d357
|
240 |
cycle_start = get_cycles(); |
b0d22e52e
|
241 |
for (i = 0; i < nr_loops; ++i) |
a69b4f741
|
242 |
fn(dst, src, size); |
b14f2d357
|
243 |
cycle_end = get_cycles(); |
308197b94
|
244 |
|
308197b94
|
245 246 |
return cycle_end - cycle_start; } |
47b5757ba
|
247 |
static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) |
308197b94
|
248 249 250 |
{ struct timeval tv_start, tv_end, tv_diff; memcpy_t fn = r->fn.memcpy; |
308197b94
|
251 |
int i; |
6db175c73
|
252 253 254 255 |
/* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ |
a69b4f741
|
256 |
fn(dst, src, size); |
308197b94
|
257 258 |
BUG_ON(gettimeofday(&tv_start, NULL)); |
b0d22e52e
|
259 |
for (i = 0; i < nr_loops; ++i) |
a69b4f741
|
260 |
fn(dst, src, size); |
308197b94
|
261 262 263 |
BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); |
b0d22e52e
|
264 |
return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); |
308197b94
|
265 |
} |
2f211c84a
|
266 |
struct function memcpy_functions[] = { |
5dd93304a
|
267 268 269 270 271 272 273 274 275 |
{ .name = "default", .desc = "Default memcpy() provided by glibc", .fn.memcpy = memcpy }, #ifdef HAVE_ARCH_X86_64_SUPPORT # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, # include "mem-memcpy-x86-64-asm-def.h" # undef MEMCPY_FN #endif |
a4c6a3e8b
|
276 |
{ .name = NULL, } |
5dd93304a
|
277 278 279 280 281 282 |
}; static const char * const bench_mem_memcpy_usage[] = { "perf bench mem memcpy <options>", NULL }; |
b0ad8ea66
|
283 |
int bench_mem_memcpy(int argc, const char **argv) |
308197b94
|
284 285 |
{ struct bench_mem_info info = { |
2f211c84a
|
286 |
.functions = memcpy_functions, |
b14f2d357
|
287 |
.do_cycles = do_memcpy_cycles, |
13839ec49
|
288 289 |
.do_gettimeofday = do_memcpy_gettimeofday, .usage = bench_mem_memcpy_usage, |
47b5757ba
|
290 |
.alloc_src = true, |
308197b94
|
291 |
}; |
2946f59ac
|
292 |
return bench_mem_common(argc, argv, &info); |
308197b94
|
293 |
} |
5bce1a577
|
294 |
|
47b5757ba
|
295 |
static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) |
5bce1a577
|
296 297 298 |
{ u64 cycle_start = 0ULL, cycle_end = 0ULL; memset_t fn = r->fn.memset; |
5bce1a577
|
299 |
int i; |
6db175c73
|
300 301 302 303 |
/* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ |
a69b4f741
|
304 |
fn(dst, -1, size); |
5bce1a577
|
305 |
|
b14f2d357
|
306 |
cycle_start = get_cycles(); |
b0d22e52e
|
307 |
for (i = 0; i < nr_loops; ++i) |
a69b4f741
|
308 |
fn(dst, i, size); |
b14f2d357
|
309 |
cycle_end = get_cycles(); |
5bce1a577
|
310 |
|
5bce1a577
|
311 312 |
return cycle_end - cycle_start; } |
47b5757ba
|
313 |
static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) |
5bce1a577
|
314 315 316 |
{ struct timeval tv_start, tv_end, tv_diff; memset_t fn = r->fn.memset; |
5bce1a577
|
317 |
int i; |
6db175c73
|
318 319 320 321 |
/* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ |
a69b4f741
|
322 |
fn(dst, -1, size); |
5bce1a577
|
323 324 |
BUG_ON(gettimeofday(&tv_start, NULL)); |
b0d22e52e
|
325 |
for (i = 0; i < nr_loops; ++i) |
a69b4f741
|
326 |
fn(dst, i, size); |
5bce1a577
|
327 328 329 |
BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); |
b0d22e52e
|
330 |
return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); |
5bce1a577
|
331 332 333 334 335 336 |
} static const char * const bench_mem_memset_usage[] = { "perf bench mem memset <options>", NULL }; |
2f211c84a
|
337 |
static const struct function memset_functions[] = { |
13839ec49
|
338 339 340 |
{ .name = "default", .desc = "Default memset() provided by glibc", .fn.memset = memset }, |
5bce1a577
|
341 |
|
13839ec49
|
342 343 344 345 |
#ifdef HAVE_ARCH_X86_64_SUPPORT # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, # include "mem-memset-x86-64-asm-def.h" # undef MEMSET_FN |
5bce1a577
|
346 |
#endif |
a4c6a3e8b
|
347 |
{ .name = NULL, } |
5bce1a577
|
348 |
}; |
b0ad8ea66
|
349 |
int bench_mem_memset(int argc, const char **argv) |
5bce1a577
|
350 351 |
{ struct bench_mem_info info = { |
2f211c84a
|
352 |
.functions = memset_functions, |
b14f2d357
|
353 |
.do_cycles = do_memset_cycles, |
13839ec49
|
354 355 |
.do_gettimeofday = do_memset_gettimeofday, .usage = bench_mem_memset_usage, |
5bce1a577
|
356 |
}; |
2946f59ac
|
357 |
return bench_mem_common(argc, argv, &info); |
5bce1a577
|
358 |
} |