Commit 936be50306a92356367f330ef9d44f1f62478d22

Authored by David Ahern
Committed by Arnaldo Carvalho de Melo
1 parent 6bb8f311a8

perf tool: Fix endianness handling of u32 data in samples

Currently, analyzing PPC data files on x86 the cpu field is always 0 and
the tid and pid are backwards. For example, analyzing a PPC file on PPC
the pid/tid fields show:

        rsyslogd  1210/1212

and analyzing the same PPC file using an x86 perf binary shows:

        rsyslogd  1212/1210

The problem is that the swap_op method for samples is
perf_event__all64_swap which assumes all elements in the sample_data
struct are u64s. cpu, tid and pid are u32s and need to be handled
individually. Given that the swap is done before the sample is parsed,
the simplest solution is to undo the 64-bit swap of those elements when
the sample is parsed and do the proper swap.

The RAW data field is generic and perf cannot have programmatic knowledge
of how to treat that data. Instead a warning is given to the user.

Thanks to Anton Blanchard for providing a data file for a mult-CPU
PPC system so I could verify the fix for the CPU fields.

v3 -> v4:
- fixed use of WARN_ONCE

v2 -> v3:
- used WARN_ONCE for message regarding raw data
- removed struct wrapper around union
- fixed whitespace issues

v1 -> v2:
- added a union for undoing the byte-swap on u64 and redoing swap on
  u32's to address compiler errors (see git commit 65014ab3)

Cc: Anton Blanchard <anton@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1315321946-16993-1-git-send-email-dsahern@gmail.com
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Showing 4 changed files with 47 additions and 14 deletions Side-by-side Diff

tools/perf/builtin-test.c
... ... @@ -561,7 +561,7 @@
561 561 }
562 562  
563 563 err = perf_event__parse_sample(event, attr.sample_type, sample_size,
564   - false, &sample);
  564 + false, &sample, false);
565 565 if (err) {
566 566 pr_err("Can't parse sample, err = %d\n", err);
567 567 goto out_munmap;
tools/perf/util/event.h
... ... @@ -186,7 +186,7 @@
186 186  
187 187 int perf_event__parse_sample(const union perf_event *event, u64 type,
188 188 int sample_size, bool sample_id_all,
189   - struct perf_sample *sample);
  189 + struct perf_sample *sample, bool swapped);
190 190  
191 191 #endif /* __PERF_RECORD_H */
tools/perf/util/evsel.c
... ... @@ -7,6 +7,8 @@
7 7 * Released under the GPL v2. (and only v2, not any later version)
8 8 */
9 9  
  10 +#include <byteswap.h>
  11 +#include "asm/bug.h"
10 12 #include "evsel.h"
11 13 #include "evlist.h"
12 14 #include "util.h"
13 15  
... ... @@ -342,10 +344,20 @@
342 344  
343 345 int perf_event__parse_sample(const union perf_event *event, u64 type,
344 346 int sample_size, bool sample_id_all,
345   - struct perf_sample *data)
  347 + struct perf_sample *data, bool swapped)
346 348 {
347 349 const u64 *array;
348 350  
  351 + /*
  352 + * used for cross-endian analysis. See git commit 65014ab3
  353 + * for why this goofiness is needed.
  354 + */
  355 + union {
  356 + u64 val64;
  357 + u32 val32[2];
  358 + } u;
  359 +
  360 +
349 361 data->cpu = data->pid = data->tid = -1;
350 362 data->stream_id = data->id = data->time = -1ULL;
351 363  
... ... @@ -366,9 +378,16 @@
366 378 }
367 379  
368 380 if (type & PERF_SAMPLE_TID) {
369   - u32 *p = (u32 *)array;
370   - data->pid = p[0];
371   - data->tid = p[1];
  381 + u.val64 = *array;
  382 + if (swapped) {
  383 + /* undo swap of u64, then swap on individual u32s */
  384 + u.val64 = bswap_64(u.val64);
  385 + u.val32[0] = bswap_32(u.val32[0]);
  386 + u.val32[1] = bswap_32(u.val32[1]);
  387 + }
  388 +
  389 + data->pid = u.val32[0];
  390 + data->tid = u.val32[1];
372 391 array++;
373 392 }
374 393  
... ... @@ -395,8 +414,15 @@
395 414 }
396 415  
397 416 if (type & PERF_SAMPLE_CPU) {
398   - u32 *p = (u32 *)array;
399   - data->cpu = *p;
  417 +
  418 + u.val64 = *array;
  419 + if (swapped) {
  420 + /* undo swap of u64, then swap on individual u32s */
  421 + u.val64 = bswap_64(u.val64);
  422 + u.val32[0] = bswap_32(u.val32[0]);
  423 + }
  424 +
  425 + data->cpu = u.val32[0];
400 426 array++;
401 427 }
402 428  
403 429  
404 430  
405 431  
... ... @@ -423,18 +449,24 @@
423 449 }
424 450  
425 451 if (type & PERF_SAMPLE_RAW) {
426   - u32 *p = (u32 *)array;
  452 + u.val64 = *array;
  453 + if (WARN_ONCE(swapped,
  454 + "Endianness of raw data not corrected!\n")) {
  455 + /* undo swap of u64, then swap on individual u32s */
  456 + u.val64 = bswap_64(u.val64);
  457 + u.val32[0] = bswap_32(u.val32[0]);
  458 + u.val32[1] = bswap_32(u.val32[1]);
  459 + }
427 460  
428 461 if (sample_overlap(event, array, sizeof(u32)))
429 462 return -EFAULT;
430 463  
431   - data->raw_size = *p;
432   - p++;
  464 + data->raw_size = u.val32[0];
433 465  
434   - if (sample_overlap(event, p, data->raw_size))
  466 + if (sample_overlap(event, &u.val32[1], data->raw_size))
435 467 return -EFAULT;
436 468  
437   - data->raw_data = p;
  469 + data->raw_data = &u.val32[1];
438 470 }
439 471  
440 472 return 0;
tools/perf/util/session.h
... ... @@ -162,7 +162,8 @@
162 162 {
163 163 return perf_event__parse_sample(event, session->sample_type,
164 164 session->sample_size,
165   - session->sample_id_all, sample);
  165 + session->sample_id_all, sample,
  166 + session->header.needs_swap);
166 167 }
167 168  
168 169 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,