Commit 1474855d0878cced6f39f51f3c2bd7428b44cb1e

Authored by Bob Nelson
Committed by Arnd Bergmann
1 parent 36aaccc1e9

[CELL] oprofile: add support to OProfile for profiling CELL BE SPUs

From: Maynard Johnson <mpjohn@us.ibm.com>

This patch updates the existing arch/powerpc/oprofile/op_model_cell.c
to add in the SPU profiling capabilities.  In addition, a 'cell' subdirectory
was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code.
Exports spu_set_profile_private_kref and spu_get_profile_private_kref which
are used by OProfile to store private profile information in spufs data
structures.

Also incorporated several fixes from other patches (rrn).  Check pointer
returned from kzalloc.  Eliminated unnecessary cast.  Better error
handling and cleanup in the related area.  64-bit unsigned long parameter
was being demoted to 32-bit unsigned int and eventually promoted back to
unsigned long.

Signed-off-by: Carl Love <carll@us.ibm.com>
Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com>
Signed-off-by: Bob Nelson <rrnelson@us.ibm.com>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>

Showing 26 changed files with 1828 additions and 133 deletions Side-by-side Diff

arch/powerpc/configs/cell_defconfig
... ... @@ -1455,7 +1455,8 @@
1455 1455 # Instrumentation Support
1456 1456 #
1457 1457 CONFIG_PROFILING=y
1458   -CONFIG_OPROFILE=y
  1458 +CONFIG_OPROFILE=m
  1459 +CONFIG_OPROFILE_CELL=y
1459 1460 # CONFIG_KPROBES is not set
1460 1461  
1461 1462 #
arch/powerpc/kernel/time.c
... ... @@ -122,6 +122,7 @@
122 122 static long timezone_offset;
123 123  
124 124 unsigned long ppc_proc_freq;
  125 +EXPORT_SYMBOL(ppc_proc_freq);
125 126 unsigned long ppc_tb_freq;
126 127  
127 128 static u64 tb_last_jiffy __cacheline_aligned_in_smp;
arch/powerpc/oprofile/Kconfig
... ... @@ -14,4 +14,12 @@
14 14 and applications.
15 15  
16 16 If unsure, say N.
  17 +
  18 +config OPROFILE_CELL
  19 + bool "OProfile for Cell Broadband Engine"
  20 + depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
  21 + default y
  22 + help
  23 + Profiling of Cell BE SPUs requires special support enabled
  24 + by this option.
arch/powerpc/oprofile/Makefile
... ... @@ -11,7 +11,9 @@
11 11 timer_int.o )
12 12  
13 13 oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
14   -oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o
  14 +oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
  15 + cell/spu_profiler.o cell/vma_map.o \
  16 + cell/spu_task_sync.o
15 17 oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
16 18 oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
17 19 oprofile-$(CONFIG_6xx) += op_model_7450.o
arch/powerpc/oprofile/cell/pr_util.h
  1 + /*
  2 + * Cell Broadband Engine OProfile Support
  3 + *
  4 + * (C) Copyright IBM Corporation 2006
  5 + *
  6 + * Author: Maynard Johnson <maynardj@us.ibm.com>
  7 + *
  8 + * This program is free software; you can redistribute it and/or
  9 + * modify it under the terms of the GNU General Public License
  10 + * as published by the Free Software Foundation; either version
  11 + * 2 of the License, or (at your option) any later version.
  12 + */
  13 +
  14 +#ifndef PR_UTIL_H
  15 +#define PR_UTIL_H
  16 +
  17 +#include <linux/cpumask.h>
  18 +#include <linux/oprofile.h>
  19 +#include <asm/cell-pmu.h>
  20 +#include <asm/spu.h>
  21 +
  22 +#include "../../platforms/cell/cbe_regs.h"
  23 +
  24 +/* Defines used for sync_start */
  25 +#define SKIP_GENERIC_SYNC 0
  26 +#define SYNC_START_ERROR -1
  27 +#define DO_GENERIC_SYNC 1
  28 +
  29 +struct spu_overlay_info { /* map of sections within an SPU overlay */
  30 + unsigned int vma; /* SPU virtual memory address from elf */
  31 + unsigned int size; /* size of section from elf */
  32 + unsigned int offset; /* offset of section into elf file */
  33 + unsigned int buf;
  34 +};
  35 +
  36 +struct vma_to_fileoffset_map { /* map of sections within an SPU program */
  37 + struct vma_to_fileoffset_map *next; /* list pointer */
  38 + unsigned int vma; /* SPU virtual memory address from elf */
  39 + unsigned int size; /* size of section from elf */
  40 + unsigned int offset; /* offset of section into elf file */
  41 + unsigned int guard_ptr;
  42 + unsigned int guard_val;
  43 + /*
  44 + * The guard pointer is an entry in the _ovly_buf_table,
  45 + * computed using ovly.buf as the index into the table. Since
  46 + * ovly.buf values begin at '1' to reference the first (or 0th)
  47 + * entry in the _ovly_buf_table, the computation subtracts 1
  48 + * from ovly.buf.
  49 + * The guard value is stored in the _ovly_buf_table entry and
  50 + * is an index (starting at 1) back to the _ovly_table entry
  51 + * that is pointing at this _ovly_buf_table entry. So, for
  52 + * example, for an overlay scenario with one overlay segment
  53 + * and two overlay sections:
  54 + * - Section 1 points to the first entry of the
  55 + * _ovly_buf_table, which contains a guard value
  56 + * of '1', referencing the first (index=0) entry of
  57 + * _ovly_table.
  58 + * - Section 2 points to the second entry of the
  59 + * _ovly_buf_table, which contains a guard value
  60 + * of '2', referencing the second (index=1) entry of
  61 + * _ovly_table.
  62 + */
  63 +
  64 +};
  65 +
  66 +/* The three functions below are for maintaining and accessing
  67 + * the vma-to-fileoffset map.
  68 + */
  69 +struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
  70 + u64 objectid);
  71 +unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
  72 + unsigned int vma, const struct spu *aSpu,
  73 + int *grd_val);
  74 +void vma_map_free(struct vma_to_fileoffset_map *map);
  75 +
  76 +/*
  77 + * Entry point for SPU profiling.
  78 + * cycles_reset is the SPU_CYCLES count value specified by the user.
  79 + */
  80 +int start_spu_profiling(unsigned int cycles_reset);
  81 +
  82 +void stop_spu_profiling(void);
  83 +
  84 +
  85 +/* add the necessary profiling hooks */
  86 +int spu_sync_start(void);
  87 +
  88 +/* remove the hooks */
  89 +int spu_sync_stop(void);
  90 +
  91 +/* Record SPU program counter samples to the oprofile event buffer. */
  92 +void spu_sync_buffer(int spu_num, unsigned int *samples,
  93 + int num_samples);
  94 +
  95 +void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
  96 +
  97 +#endif /* PR_UTIL_H */
arch/powerpc/oprofile/cell/spu_profiler.c
  1 +/*
  2 + * Cell Broadband Engine OProfile Support
  3 + *
  4 + * (C) Copyright IBM Corporation 2006
  5 + *
  6 + * Authors: Maynard Johnson <maynardj@us.ibm.com>
  7 + * Carl Love <carll@us.ibm.com>
  8 + *
  9 + * This program is free software; you can redistribute it and/or
  10 + * modify it under the terms of the GNU General Public License
  11 + * as published by the Free Software Foundation; either version
  12 + * 2 of the License, or (at your option) any later version.
  13 + */
  14 +
  15 +#include <linux/hrtimer.h>
  16 +#include <linux/smp.h>
  17 +#include <linux/slab.h>
  18 +#include <asm/cell-pmu.h>
  19 +#include "pr_util.h"
  20 +
  21 +#define TRACE_ARRAY_SIZE 1024
  22 +#define SCALE_SHIFT 14
  23 +
  24 +static u32 *samples;
  25 +
  26 +static int spu_prof_running;
  27 +static unsigned int profiling_interval;
  28 +
  29 +#define NUM_SPU_BITS_TRBUF 16
  30 +#define SPUS_PER_TB_ENTRY 4
  31 +#define SPUS_PER_NODE 8
  32 +
  33 +#define SPU_PC_MASK 0xFFFF
  34 +
  35 +static DEFINE_SPINLOCK(sample_array_lock);
  36 +unsigned long sample_array_lock_flags;
  37 +
  38 +void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
  39 +{
  40 + unsigned long ns_per_cyc;
  41 +
  42 + if (!freq_khz)
  43 + freq_khz = ppc_proc_freq/1000;
  44 +
  45 + /* To calculate a timeout in nanoseconds, the basic
  46 + * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
  47 + * To avoid floating point math, we use the scale math
  48 + * technique as described in linux/jiffies.h. We use
  49 + * a scale factor of SCALE_SHIFT, which provides 4 decimal places
  50 + * of precision. This is close enough for the purpose at hand.
  51 + *
  52 + * The value of the timeout should be small enough that the hw
  53 + * trace buffer will not get more then about 1/3 full for the
  54 + * maximum user specified (the LFSR value) hw sampling frequency.
  55 + * This is to ensure the trace buffer will never fill even if the
  56 + * kernel thread scheduling varies under a heavy system load.
  57 + */
  58 +
  59 + ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
  60 + profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
  61 +
  62 +}
  63 +
  64 +/*
  65 + * Extract SPU PC from trace buffer entry
  66 + */
  67 +static void spu_pc_extract(int cpu, int entry)
  68 +{
  69 + /* the trace buffer is 128 bits */
  70 + u64 trace_buffer[2];
  71 + u64 spu_mask;
  72 + int spu;
  73 +
  74 + spu_mask = SPU_PC_MASK;
  75 +
  76 + /* Each SPU PC is 16 bits; hence, four spus in each of
  77 + * the two 64-bit buffer entries that make up the
  78 + * 128-bit trace_buffer entry. Process two 64-bit values
  79 + * simultaneously.
  80 + * trace[0] SPU PC contents are: 0 1 2 3
  81 + * trace[1] SPU PC contents are: 4 5 6 7
  82 + */
  83 +
  84 + cbe_read_trace_buffer(cpu, trace_buffer);
  85 +
  86 + for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
  87 + /* spu PC trace entry is upper 16 bits of the
  88 + * 18 bit SPU program counter
  89 + */
  90 + samples[spu * TRACE_ARRAY_SIZE + entry]
  91 + = (spu_mask & trace_buffer[0]) << 2;
  92 + samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
  93 + = (spu_mask & trace_buffer[1]) << 2;
  94 +
  95 + trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
  96 + trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
  97 + }
  98 +}
  99 +
  100 +static int cell_spu_pc_collection(int cpu)
  101 +{
  102 + u32 trace_addr;
  103 + int entry;
  104 +
  105 + /* process the collected SPU PC for the node */
  106 +
  107 + entry = 0;
  108 +
  109 + trace_addr = cbe_read_pm(cpu, trace_address);
  110 + while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
  111 + /* there is data in the trace buffer to process */
  112 + spu_pc_extract(cpu, entry);
  113 +
  114 + entry++;
  115 +
  116 + if (entry >= TRACE_ARRAY_SIZE)
  117 + /* spu_samples is full */
  118 + break;
  119 +
  120 + trace_addr = cbe_read_pm(cpu, trace_address);
  121 + }
  122 +
  123 + return entry;
  124 +}
  125 +
  126 +
  127 +static enum hrtimer_restart profile_spus(struct hrtimer *timer)
  128 +{
  129 + ktime_t kt;
  130 + int cpu, node, k, num_samples, spu_num;
  131 +
  132 + if (!spu_prof_running)
  133 + goto stop;
  134 +
  135 + for_each_online_cpu(cpu) {
  136 + if (cbe_get_hw_thread_id(cpu))
  137 + continue;
  138 +
  139 + node = cbe_cpu_to_node(cpu);
  140 +
  141 + /* There should only be one kernel thread at a time processing
  142 + * the samples. In the very unlikely case that the processing
  143 + * is taking a very long time and multiple kernel threads are
  144 + * started to process the samples. Make sure only one kernel
  145 + * thread is working on the samples array at a time. The
  146 + * sample array must be loaded and then processed for a given
  147 + * cpu. The sample array is not per cpu.
  148 + */
  149 + spin_lock_irqsave(&sample_array_lock,
  150 + sample_array_lock_flags);
  151 + num_samples = cell_spu_pc_collection(cpu);
  152 +
  153 + if (num_samples == 0) {
  154 + spin_unlock_irqrestore(&sample_array_lock,
  155 + sample_array_lock_flags);
  156 + continue;
  157 + }
  158 +
  159 + for (k = 0; k < SPUS_PER_NODE; k++) {
  160 + spu_num = k + (node * SPUS_PER_NODE);
  161 + spu_sync_buffer(spu_num,
  162 + samples + (k * TRACE_ARRAY_SIZE),
  163 + num_samples);
  164 + }
  165 +
  166 + spin_unlock_irqrestore(&sample_array_lock,
  167 + sample_array_lock_flags);
  168 +
  169 + }
  170 + smp_wmb(); /* insure spu event buffer updates are written */
  171 + /* don't want events intermingled... */
  172 +
  173 + kt = ktime_set(0, profiling_interval);
  174 + if (!spu_prof_running)
  175 + goto stop;
  176 + hrtimer_forward(timer, timer->base->get_time(), kt);
  177 + return HRTIMER_RESTART;
  178 +
  179 + stop:
  180 + printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
  181 + return HRTIMER_NORESTART;
  182 +}
  183 +
  184 +static struct hrtimer timer;
  185 +/*
  186 + * Entry point for SPU profiling.
  187 + * NOTE: SPU profiling is done system-wide, not per-CPU.
  188 + *
  189 + * cycles_reset is the count value specified by the user when
  190 + * setting up OProfile to count SPU_CYCLES.
  191 + */
  192 +int start_spu_profiling(unsigned int cycles_reset)
  193 +{
  194 + ktime_t kt;
  195 +
  196 + pr_debug("timer resolution: %lu\n", TICK_NSEC);
  197 + kt = ktime_set(0, profiling_interval);
  198 + hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  199 + timer.expires = kt;
  200 + timer.function = profile_spus;
  201 +
  202 + /* Allocate arrays for collecting SPU PC samples */
  203 + samples = kzalloc(SPUS_PER_NODE *
  204 + TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
  205 +
  206 + if (!samples)
  207 + return -ENOMEM;
  208 +
  209 + spu_prof_running = 1;
  210 + hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
  211 +
  212 + return 0;
  213 +}
  214 +
  215 +void stop_spu_profiling(void)
  216 +{
  217 + spu_prof_running = 0;
  218 + hrtimer_cancel(&timer);
  219 + kfree(samples);
  220 + pr_debug("SPU_PROF: stop_spu_profiling issued\n");
  221 +}
arch/powerpc/oprofile/cell/spu_task_sync.c
  1 +/*
  2 + * Cell Broadband Engine OProfile Support
  3 + *
  4 + * (C) Copyright IBM Corporation 2006
  5 + *
  6 + * Author: Maynard Johnson <maynardj@us.ibm.com>
  7 + *
  8 + * This program is free software; you can redistribute it and/or
  9 + * modify it under the terms of the GNU General Public License
  10 + * as published by the Free Software Foundation; either version
  11 + * 2 of the License, or (at your option) any later version.
  12 + */
  13 +
  14 +/* The purpose of this file is to handle SPU event task switching
  15 + * and to record SPU context information into the OProfile
  16 + * event buffer.
  17 + *
  18 + * Additionally, the spu_sync_buffer function is provided as a helper
  19 + * for recoding actual SPU program counter samples to the event buffer.
  20 + */
  21 +#include <linux/dcookies.h>
  22 +#include <linux/kref.h>
  23 +#include <linux/mm.h>
  24 +#include <linux/module.h>
  25 +#include <linux/notifier.h>
  26 +#include <linux/numa.h>
  27 +#include <linux/oprofile.h>
  28 +#include <linux/spinlock.h>
  29 +#include "pr_util.h"
  30 +
  31 +#define RELEASE_ALL 9999
  32 +
  33 +static DEFINE_SPINLOCK(buffer_lock);
  34 +static DEFINE_SPINLOCK(cache_lock);
  35 +static int num_spu_nodes;
  36 +int spu_prof_num_nodes;
  37 +int last_guard_val[MAX_NUMNODES * 8];
  38 +
  39 +/* Container for caching information about an active SPU task. */
  40 +struct cached_info {
  41 + struct vma_to_fileoffset_map *map;
  42 + struct spu *the_spu; /* needed to access pointer to local_store */
  43 + struct kref cache_ref;
  44 +};
  45 +
  46 +static struct cached_info *spu_info[MAX_NUMNODES * 8];
  47 +
  48 +static void destroy_cached_info(struct kref *kref)
  49 +{
  50 + struct cached_info *info;
  51 +
  52 + info = container_of(kref, struct cached_info, cache_ref);
  53 + vma_map_free(info->map);
  54 + kfree(info);
  55 + module_put(THIS_MODULE);
  56 +}
  57 +
  58 +/* Return the cached_info for the passed SPU number.
  59 + * ATTENTION: Callers are responsible for obtaining the
  60 + * cache_lock if needed prior to invoking this function.
  61 + */
  62 +static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
  63 +{
  64 + struct kref *ref;
  65 + struct cached_info *ret_info;
  66 +
  67 + if (spu_num >= num_spu_nodes) {
  68 + printk(KERN_ERR "SPU_PROF: "
  69 + "%s, line %d: Invalid index %d into spu info cache\n",
  70 + __FUNCTION__, __LINE__, spu_num);
  71 + ret_info = NULL;
  72 + goto out;
  73 + }
  74 + if (!spu_info[spu_num] && the_spu) {
  75 + ref = spu_get_profile_private_kref(the_spu->ctx);
  76 + if (ref) {
  77 + spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
  78 + kref_get(&spu_info[spu_num]->cache_ref);
  79 + }
  80 + }
  81 +
  82 + ret_info = spu_info[spu_num];
  83 + out:
  84 + return ret_info;
  85 +}
  86 +
  87 +
  88 +/* Looks for cached info for the passed spu. If not found, the
  89 + * cached info is created for the passed spu.
  90 + * Returns 0 for success; otherwise, -1 for error.
  91 + */
  92 +static int
  93 +prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
  94 +{
  95 + unsigned long flags;
  96 + struct vma_to_fileoffset_map *new_map;
  97 + int retval = 0;
  98 + struct cached_info *info;
  99 +
  100 + /* We won't bother getting cache_lock here since
  101 + * don't do anything with the cached_info that's returned.
  102 + */
  103 + info = get_cached_info(spu, spu->number);
  104 +
  105 + if (info) {
  106 + pr_debug("Found cached SPU info.\n");
  107 + goto out;
  108 + }
  109 +
  110 + /* Create cached_info and set spu_info[spu->number] to point to it.
  111 + * spu->number is a system-wide value, not a per-node value.
  112 + */
  113 + info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
  114 + if (!info) {
  115 + printk(KERN_ERR "SPU_PROF: "
  116 + "%s, line %d: create vma_map failed\n",
  117 + __FUNCTION__, __LINE__);
  118 + retval = -ENOMEM;
  119 + goto err_alloc;
  120 + }
  121 + new_map = create_vma_map(spu, objectId);
  122 + if (!new_map) {
  123 + printk(KERN_ERR "SPU_PROF: "
  124 + "%s, line %d: create vma_map failed\n",
  125 + __FUNCTION__, __LINE__);
  126 + retval = -ENOMEM;
  127 + goto err_alloc;
  128 + }
  129 +
  130 + pr_debug("Created vma_map\n");
  131 + info->map = new_map;
  132 + info->the_spu = spu;
  133 + kref_init(&info->cache_ref);
  134 + spin_lock_irqsave(&cache_lock, flags);
  135 + spu_info[spu->number] = info;
  136 + /* Increment count before passing off ref to SPUFS. */
  137 + kref_get(&info->cache_ref);
  138 +
  139 + /* We increment the module refcount here since SPUFS is
  140 + * responsible for the final destruction of the cached_info,
  141 + * and it must be able to access the destroy_cached_info()
  142 + * function defined in the OProfile module. We decrement
  143 + * the module refcount in destroy_cached_info.
  144 + */
  145 + try_module_get(THIS_MODULE);
  146 + spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
  147 + destroy_cached_info);
  148 + spin_unlock_irqrestore(&cache_lock, flags);
  149 + goto out;
  150 +
  151 +err_alloc:
  152 + kfree(info);
  153 +out:
  154 + return retval;
  155 +}
  156 +
  157 +/*
  158 + * NOTE: The caller is responsible for locking the
  159 + * cache_lock prior to calling this function.
  160 + */
  161 +static int release_cached_info(int spu_index)
  162 +{
  163 + int index, end;
  164 +
  165 + if (spu_index == RELEASE_ALL) {
  166 + end = num_spu_nodes;
  167 + index = 0;
  168 + } else {
  169 + if (spu_index >= num_spu_nodes) {
  170 + printk(KERN_ERR "SPU_PROF: "
  171 + "%s, line %d: "
  172 + "Invalid index %d into spu info cache\n",
  173 + __FUNCTION__, __LINE__, spu_index);
  174 + goto out;
  175 + }
  176 + end = spu_index + 1;
  177 + index = spu_index;
  178 + }
  179 + for (; index < end; index++) {
  180 + if (spu_info[index]) {
  181 + kref_put(&spu_info[index]->cache_ref,
  182 + destroy_cached_info);
  183 + spu_info[index] = NULL;
  184 + }
  185 + }
  186 +
  187 +out:
  188 + return 0;
  189 +}
  190 +
  191 +/* The source code for fast_get_dcookie was "borrowed"
  192 + * from drivers/oprofile/buffer_sync.c.
  193 + */
  194 +
  195 +/* Optimisation. We can manage without taking the dcookie sem
  196 + * because we cannot reach this code without at least one
  197 + * dcookie user still being registered (namely, the reader
  198 + * of the event buffer).
  199 + */
  200 +static inline unsigned long fast_get_dcookie(struct dentry *dentry,
  201 + struct vfsmount *vfsmnt)
  202 +{
  203 + unsigned long cookie;
  204 +
  205 + if (dentry->d_cookie)
  206 + return (unsigned long)dentry;
  207 + get_dcookie(dentry, vfsmnt, &cookie);
  208 + return cookie;
  209 +}
  210 +
  211 +/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
  212 + * which corresponds loosely to "application name". Also, determine
  213 + * the offset for the SPU ELF object. If computed offset is
  214 + * non-zero, it implies an embedded SPU object; otherwise, it's a
  215 + * separate SPU binary, in which case we retrieve it's dcookie.
  216 + * For the embedded case, we must determine if SPU ELF is embedded
  217 + * in the executable application or another file (i.e., shared lib).
  218 + * If embedded in a shared lib, we must get the dcookie and return
  219 + * that to the caller.
  220 + */
  221 +static unsigned long
  222 +get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
  223 + unsigned long *spu_bin_dcookie,
  224 + unsigned long spu_ref)
  225 +{
  226 + unsigned long app_cookie = 0;
  227 + unsigned int my_offset = 0;
  228 + struct file *app = NULL;
  229 + struct vm_area_struct *vma;
  230 + struct mm_struct *mm = spu->mm;
  231 +
  232 + if (!mm)
  233 + goto out;
  234 +
  235 + down_read(&mm->mmap_sem);
  236 +
  237 + for (vma = mm->mmap; vma; vma = vma->vm_next) {
  238 + if (!vma->vm_file)
  239 + continue;
  240 + if (!(vma->vm_flags & VM_EXECUTABLE))
  241 + continue;
  242 + app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
  243 + vma->vm_file->f_vfsmnt);
  244 + pr_debug("got dcookie for %s\n",
  245 + vma->vm_file->f_dentry->d_name.name);
  246 + app = vma->vm_file;
  247 + break;
  248 + }
  249 +
  250 + for (vma = mm->mmap; vma; vma = vma->vm_next) {
  251 + if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
  252 + continue;
  253 + my_offset = spu_ref - vma->vm_start;
  254 + if (!vma->vm_file)
  255 + goto fail_no_image_cookie;
  256 +
  257 + pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
  258 + my_offset, spu_ref,
  259 + vma->vm_file->f_dentry->d_name.name);
  260 + *offsetp = my_offset;
  261 + break;
  262 + }
  263 +
  264 + *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
  265 + vma->vm_file->f_vfsmnt);
  266 + pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
  267 +
  268 + up_read(&mm->mmap_sem);
  269 +
  270 +out:
  271 + return app_cookie;
  272 +
  273 +fail_no_image_cookie:
  274 + up_read(&mm->mmap_sem);
  275 +
  276 + printk(KERN_ERR "SPU_PROF: "
  277 + "%s, line %d: Cannot find dcookie for SPU binary\n",
  278 + __FUNCTION__, __LINE__);
  279 + goto out;
  280 +}
  281 +
  282 +
  283 +
  284 +/* This function finds or creates cached context information for the
  285 + * passed SPU and records SPU context information into the OProfile
  286 + * event buffer.
  287 + */
  288 +static int process_context_switch(struct spu *spu, unsigned long objectId)
  289 +{
  290 + unsigned long flags;
  291 + int retval;
  292 + unsigned int offset = 0;
  293 + unsigned long spu_cookie = 0, app_dcookie;
  294 +
  295 + retval = prepare_cached_spu_info(spu, objectId);
  296 + if (retval)
  297 + goto out;
  298 +
  299 + /* Get dcookie first because a mutex_lock is taken in that
  300 + * code path, so interrupts must not be disabled.
  301 + */
  302 + app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
  303 + if (!app_dcookie || !spu_cookie) {
  304 + retval = -ENOENT;
  305 + goto out;
  306 + }
  307 +
  308 + /* Record context info in event buffer */
  309 + spin_lock_irqsave(&buffer_lock, flags);
  310 + add_event_entry(ESCAPE_CODE);
  311 + add_event_entry(SPU_CTX_SWITCH_CODE);
  312 + add_event_entry(spu->number);
  313 + add_event_entry(spu->pid);
  314 + add_event_entry(spu->tgid);
  315 + add_event_entry(app_dcookie);
  316 + add_event_entry(spu_cookie);
  317 + add_event_entry(offset);
  318 + spin_unlock_irqrestore(&buffer_lock, flags);
  319 + smp_wmb(); /* insure spu event buffer updates are written */
  320 + /* don't want entries intermingled... */
  321 +out:
  322 + return retval;
  323 +}
  324 +
  325 +/*
  326 + * This function is invoked on either a bind_context or unbind_context.
  327 + * If called for an unbind_context, the val arg is 0; otherwise,
  328 + * it is the object-id value for the spu context.
  329 + * The data arg is of type 'struct spu *'.
  330 + */
  331 +static int spu_active_notify(struct notifier_block *self, unsigned long val,
  332 + void *data)
  333 +{
  334 + int retval;
  335 + unsigned long flags;
  336 + struct spu *the_spu = data;
  337 +
  338 + pr_debug("SPU event notification arrived\n");
  339 + if (!val) {
  340 + spin_lock_irqsave(&cache_lock, flags);
  341 + retval = release_cached_info(the_spu->number);
  342 + spin_unlock_irqrestore(&cache_lock, flags);
  343 + } else {
  344 + retval = process_context_switch(the_spu, val);
  345 + }
  346 + return retval;
  347 +}
  348 +
  349 +static struct notifier_block spu_active = {
  350 + .notifier_call = spu_active_notify,
  351 +};
  352 +
  353 +static int number_of_online_nodes(void)
  354 +{
  355 + u32 cpu; u32 tmp;
  356 + int nodes = 0;
  357 + for_each_online_cpu(cpu) {
  358 + tmp = cbe_cpu_to_node(cpu) + 1;
  359 + if (tmp > nodes)
  360 + nodes++;
  361 + }
  362 + return nodes;
  363 +}
  364 +
  365 +/* The main purpose of this function is to synchronize
  366 + * OProfile with SPUFS by registering to be notified of
  367 + * SPU task switches.
  368 + *
  369 + * NOTE: When profiling SPUs, we must ensure that only
  370 + * spu_sync_start is invoked and not the generic sync_start
  371 + * in drivers/oprofile/oprof.c. A return value of
  372 + * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
  373 + * accomplish this.
  374 + */
  375 +int spu_sync_start(void)
  376 +{
  377 + int k;
  378 + int ret = SKIP_GENERIC_SYNC;
  379 + int register_ret;
  380 + unsigned long flags = 0;
  381 +
  382 + spu_prof_num_nodes = number_of_online_nodes();
  383 + num_spu_nodes = spu_prof_num_nodes * 8;
  384 +
  385 + spin_lock_irqsave(&buffer_lock, flags);
  386 + add_event_entry(ESCAPE_CODE);
  387 + add_event_entry(SPU_PROFILING_CODE);
  388 + add_event_entry(num_spu_nodes);
  389 + spin_unlock_irqrestore(&buffer_lock, flags);
  390 +
  391 + /* Register for SPU events */
  392 + register_ret = spu_switch_event_register(&spu_active);
  393 + if (register_ret) {
  394 + ret = SYNC_START_ERROR;
  395 + goto out;
  396 + }
  397 +
  398 + for (k = 0; k < (MAX_NUMNODES * 8); k++)
  399 + last_guard_val[k] = 0;
  400 + pr_debug("spu_sync_start -- running.\n");
  401 +out:
  402 + return ret;
  403 +}
  404 +
  405 +/* Record SPU program counter samples to the oprofile event buffer. */
  406 +void spu_sync_buffer(int spu_num, unsigned int *samples,
  407 + int num_samples)
  408 +{
  409 + unsigned long long file_offset;
  410 + unsigned long flags;
  411 + int i;
  412 + struct vma_to_fileoffset_map *map;
  413 + struct spu *the_spu;
  414 + unsigned long long spu_num_ll = spu_num;
  415 + unsigned long long spu_num_shifted = spu_num_ll << 32;
  416 + struct cached_info *c_info;
  417 +
  418 + /* We need to obtain the cache_lock here because it's
  419 + * possible that after getting the cached_info, the SPU job
  420 + * corresponding to this cached_info may end, thus resulting
  421 + * in the destruction of the cached_info.
  422 + */
  423 + spin_lock_irqsave(&cache_lock, flags);
  424 + c_info = get_cached_info(NULL, spu_num);
  425 + if (!c_info) {
  426 + /* This legitimately happens when the SPU task ends before all
  427 + * samples are recorded.
  428 + * No big deal -- so we just drop a few samples.
  429 + */
  430 + pr_debug("SPU_PROF: No cached SPU contex "
  431 + "for SPU #%d. Dropping samples.\n", spu_num);
  432 + goto out;
  433 + }
  434 +
  435 + map = c_info->map;
  436 + the_spu = c_info->the_spu;
  437 + spin_lock(&buffer_lock);
  438 + for (i = 0; i < num_samples; i++) {
  439 + unsigned int sample = *(samples+i);
  440 + int grd_val = 0;
  441 + file_offset = 0;
  442 + if (sample == 0)
  443 + continue;
  444 + file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
  445 +
  446 + /* If overlays are used by this SPU application, the guard
  447 + * value is non-zero, indicating which overlay section is in
  448 + * use. We need to discard samples taken during the time
  449 + * period which an overlay occurs (i.e., guard value changes).
  450 + */
  451 + if (grd_val && grd_val != last_guard_val[spu_num]) {
  452 + last_guard_val[spu_num] = grd_val;
  453 + /* Drop the rest of the samples. */
  454 + break;
  455 + }
  456 +
  457 + add_event_entry(file_offset | spu_num_shifted);
  458 + }
  459 + spin_unlock(&buffer_lock);
  460 +out:
  461 + spin_unlock_irqrestore(&cache_lock, flags);
  462 +}
  463 +
  464 +
  465 +int spu_sync_stop(void)
  466 +{
  467 + unsigned long flags = 0;
  468 + int ret = spu_switch_event_unregister(&spu_active);
  469 + if (ret) {
  470 + printk(KERN_ERR "SPU_PROF: "
  471 + "%s, line %d: spu_switch_event_unregister returned %d\n",
  472 + __FUNCTION__, __LINE__, ret);
  473 + goto out;
  474 + }
  475 +
  476 + spin_lock_irqsave(&cache_lock, flags);
  477 + ret = release_cached_info(RELEASE_ALL);
  478 + spin_unlock_irqrestore(&cache_lock, flags);
  479 +out:
  480 + pr_debug("spu_sync_stop -- done.\n");
  481 + return ret;
  482 +}
arch/powerpc/oprofile/cell/vma_map.c
  1 +/*
  2 + * Cell Broadband Engine OProfile Support
  3 + *
  4 + * (C) Copyright IBM Corporation 2006
  5 + *
  6 + * Author: Maynard Johnson <maynardj@us.ibm.com>
  7 + *
  8 + * This program is free software; you can redistribute it and/or
  9 + * modify it under the terms of the GNU General Public License
  10 + * as published by the Free Software Foundation; either version
  11 + * 2 of the License, or (at your option) any later version.
  12 + */
  13 +
  14 +/* The code in this source file is responsible for generating
  15 + * vma-to-fileOffset maps for both overlay and non-overlay SPU
  16 + * applications.
  17 + */
  18 +
  19 +#include <linux/mm.h>
  20 +#include <linux/string.h>
  21 +#include <linux/uaccess.h>
  22 +#include <linux/elf.h>
  23 +#include "pr_util.h"
  24 +
  25 +
  26 +void vma_map_free(struct vma_to_fileoffset_map *map)
  27 +{
  28 + while (map) {
  29 + struct vma_to_fileoffset_map *next = map->next;
  30 + kfree(map);
  31 + map = next;
  32 + }
  33 +}
  34 +
  35 +unsigned int
  36 +vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
  37 + const struct spu *aSpu, int *grd_val)
  38 +{
  39 + /*
  40 + * Default the offset to the physical address + a flag value.
  41 + * Addresses of dynamically generated code can't be found in the vma
  42 + * map. For those addresses the flagged value will be sent on to
  43 + * the user space tools so they can be reported rather than just
  44 + * thrown away.
  45 + */
  46 + u32 offset = 0x10000000 + vma;
  47 + u32 ovly_grd;
  48 +
  49 + for (; map; map = map->next) {
  50 + if (vma < map->vma || vma >= map->vma + map->size)
  51 + continue;
  52 +
  53 + if (map->guard_ptr) {
  54 + ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
  55 + if (ovly_grd != map->guard_val)
  56 + continue;
  57 + *grd_val = ovly_grd;
  58 + }
  59 + offset = vma - map->vma + map->offset;
  60 + break;
  61 + }
  62 +
  63 + return offset;
  64 +}
  65 +
  66 +static struct vma_to_fileoffset_map *
  67 +vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
  68 + unsigned int size, unsigned int offset, unsigned int guard_ptr,
  69 + unsigned int guard_val)
  70 +{
  71 + struct vma_to_fileoffset_map *new =
  72 + kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
  73 + if (!new) {
  74 + printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
  75 + __FUNCTION__, __LINE__);
  76 + vma_map_free(map);
  77 + return NULL;
  78 + }
  79 +
  80 + new->next = map;
  81 + new->vma = vma;
  82 + new->size = size;
  83 + new->offset = offset;
  84 + new->guard_ptr = guard_ptr;
  85 + new->guard_val = guard_val;
  86 +
  87 + return new;
  88 +}
  89 +
  90 +
  91 +/* Parse SPE ELF header and generate a list of vma_maps.
  92 + * A pointer to the first vma_map in the generated list
  93 + * of vma_maps is returned. */
  94 +struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
  95 + unsigned long spu_elf_start)
  96 +{
  97 + static const unsigned char expected[EI_PAD] = {
  98 + [EI_MAG0] = ELFMAG0,
  99 + [EI_MAG1] = ELFMAG1,
  100 + [EI_MAG2] = ELFMAG2,
  101 + [EI_MAG3] = ELFMAG3,
  102 + [EI_CLASS] = ELFCLASS32,
  103 + [EI_DATA] = ELFDATA2MSB,
  104 + [EI_VERSION] = EV_CURRENT,
  105 + [EI_OSABI] = ELFOSABI_NONE
  106 + };
  107 +
  108 + int grd_val;
  109 + struct vma_to_fileoffset_map *map = NULL;
  110 + struct spu_overlay_info ovly;
  111 + unsigned int overlay_tbl_offset = -1;
  112 + unsigned long phdr_start, shdr_start;
  113 + Elf32_Ehdr ehdr;
  114 + Elf32_Phdr phdr;
  115 + Elf32_Shdr shdr, shdr_str;
  116 + Elf32_Sym sym;
  117 + int i, j;
  118 + char name[32];
  119 +
  120 + unsigned int ovly_table_sym = 0;
  121 + unsigned int ovly_buf_table_sym = 0;
  122 + unsigned int ovly_table_end_sym = 0;
  123 + unsigned int ovly_buf_table_end_sym = 0;
  124 + unsigned long ovly_table;
  125 + unsigned int n_ovlys;
  126 +
  127 + /* Get and validate ELF header. */
  128 +
  129 + if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
  130 + goto fail;
  131 +
  132 + if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
  133 + printk(KERN_ERR "SPU_PROF: "
  134 + "%s, line %d: Unexpected e_ident parsing SPU ELF\n",
  135 + __FUNCTION__, __LINE__);
  136 + goto fail;
  137 + }
  138 + if (ehdr.e_machine != EM_SPU) {
  139 + printk(KERN_ERR "SPU_PROF: "
  140 + "%s, line %d: Unexpected e_machine parsing SPU ELF\n",
  141 + __FUNCTION__, __LINE__);
  142 + goto fail;
  143 + }
  144 + if (ehdr.e_type != ET_EXEC) {
  145 + printk(KERN_ERR "SPU_PROF: "
  146 + "%s, line %d: Unexpected e_type parsing SPU ELF\n",
  147 + __FUNCTION__, __LINE__);
  148 + goto fail;
  149 + }
  150 + phdr_start = spu_elf_start + ehdr.e_phoff;
  151 + shdr_start = spu_elf_start + ehdr.e_shoff;
  152 +
  153 + /* Traverse program headers. */
  154 + for (i = 0; i < ehdr.e_phnum; i++) {
  155 + if (copy_from_user(&phdr,
  156 + (void *) (phdr_start + i * sizeof(phdr)),
  157 + sizeof(phdr)))
  158 + goto fail;
  159 +
  160 + if (phdr.p_type != PT_LOAD)
  161 + continue;
  162 + if (phdr.p_flags & (1 << 27))
  163 + continue;
  164 +
  165 + map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
  166 + phdr.p_offset, 0, 0);
  167 + if (!map)
  168 + goto fail;
  169 + }
  170 +
  171 + pr_debug("SPU_PROF: Created non-overlay maps\n");
  172 + /* Traverse section table and search for overlay-related symbols. */
  173 + for (i = 0; i < ehdr.e_shnum; i++) {
  174 + if (copy_from_user(&shdr,
  175 + (void *) (shdr_start + i * sizeof(shdr)),
  176 + sizeof(shdr)))
  177 + goto fail;
  178 +
  179 + if (shdr.sh_type != SHT_SYMTAB)
  180 + continue;
  181 + if (shdr.sh_entsize != sizeof (sym))
  182 + continue;
  183 +
  184 + if (copy_from_user(&shdr_str,
  185 + (void *) (shdr_start + shdr.sh_link *
  186 + sizeof(shdr)),
  187 + sizeof(shdr)))
  188 + goto fail;
  189 +
  190 + if (shdr_str.sh_type != SHT_STRTAB)
  191 + goto fail;;
  192 +
  193 + for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
  194 + if (copy_from_user(&sym, (void *) (spu_elf_start +
  195 + shdr.sh_offset + j *
  196 + sizeof (sym)),
  197 + sizeof (sym)))
  198 + goto fail;
  199 +
  200 + if (copy_from_user(name, (void *)
  201 + (spu_elf_start + shdr_str.sh_offset +
  202 + sym.st_name),
  203 + 20))
  204 + goto fail;
  205 +
  206 + if (memcmp(name, "_ovly_table", 12) == 0)
  207 + ovly_table_sym = sym.st_value;
  208 + if (memcmp(name, "_ovly_buf_table", 16) == 0)
  209 + ovly_buf_table_sym = sym.st_value;
  210 + if (memcmp(name, "_ovly_table_end", 16) == 0)
  211 + ovly_table_end_sym = sym.st_value;
  212 + if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
  213 + ovly_buf_table_end_sym = sym.st_value;
  214 + }
  215 + }
  216 +
  217 + /* If we don't have overlays, we're done. */
  218 + if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
  219 + || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
  220 + pr_debug("SPU_PROF: No overlay table found\n");
  221 + goto out;
  222 + } else {
  223 + pr_debug("SPU_PROF: Overlay table found\n");
  224 + }
  225 +
  226 + /* The _ovly_table symbol represents a table with one entry
  227 + * per overlay section. The _ovly_buf_table symbol represents
  228 + * a table with one entry per overlay region.
  229 + * The struct spu_overlay_info gives the structure of the _ovly_table
  230 + * entries. The structure of _ovly_table_buf is simply one
  231 + * u32 word per entry.
  232 + */
  233 + overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
  234 + aSpu, &grd_val);
  235 + if (overlay_tbl_offset < 0) {
  236 + printk(KERN_ERR "SPU_PROF: "
  237 + "%s, line %d: Error finding SPU overlay table\n",
  238 + __FUNCTION__, __LINE__);
  239 + goto fail;
  240 + }
  241 + ovly_table = spu_elf_start + overlay_tbl_offset;
  242 +
  243 + n_ovlys = (ovly_table_end_sym -
  244 + ovly_table_sym) / sizeof (ovly);
  245 +
  246 + /* Traverse overlay table. */
  247 + for (i = 0; i < n_ovlys; i++) {
  248 + if (copy_from_user(&ovly, (void *)
  249 + (ovly_table + i * sizeof (ovly)),
  250 + sizeof (ovly)))
  251 + goto fail;
  252 +
  253 + /* The ovly.vma/size/offset arguments are analogous to the same
  254 + * arguments used above for non-overlay maps. The final two
  255 + * args are referred to as the guard pointer and the guard
  256 + * value.
  257 + * The guard pointer is an entry in the _ovly_buf_table,
  258 + * computed using ovly.buf as the index into the table. Since
  259 + * ovly.buf values begin at '1' to reference the first (or 0th)
  260 + * entry in the _ovly_buf_table, the computation subtracts 1
  261 + * from ovly.buf.
  262 + * The guard value is stored in the _ovly_buf_table entry and
  263 + * is an index (starting at 1) back to the _ovly_table entry
  264 + * that is pointing at this _ovly_buf_table entry. So, for
  265 + * example, for an overlay scenario with one overlay segment
  266 + * and two overlay sections:
  267 + * - Section 1 points to the first entry of the
  268 + * _ovly_buf_table, which contains a guard value
  269 + * of '1', referencing the first (index=0) entry of
  270 + * _ovly_table.
  271 + * - Section 2 points to the second entry of the
  272 + * _ovly_buf_table, which contains a guard value
  273 + * of '2', referencing the second (index=1) entry of
  274 + * _ovly_table.
  275 + */
  276 + map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
  277 + ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
  278 + if (!map)
  279 + goto fail;
  280 + }
  281 + goto out;
  282 +
  283 + fail:
  284 + map = NULL;
  285 + out:
  286 + return map;
  287 +}
arch/powerpc/oprofile/common.c
... ... @@ -29,6 +29,8 @@
29 29 static struct op_counter_config ctr[OP_MAX_COUNTER];
30 30 static struct op_system_config sys;
31 31  
  32 +static int op_per_cpu_rc;
  33 +
32 34 static void op_handle_interrupt(struct pt_regs *regs)
33 35 {
34 36 model->handle_interrupt(regs, ctr);
35 37  
36 38  
37 39  
38 40  
... ... @@ -36,25 +38,41 @@
36 38  
37 39 static void op_powerpc_cpu_setup(void *dummy)
38 40 {
39   - model->cpu_setup(ctr);
  41 + int ret;
  42 +
  43 + ret = model->cpu_setup(ctr);
  44 +
  45 + if (ret != 0)
  46 + op_per_cpu_rc = ret;
40 47 }
41 48  
42 49 static int op_powerpc_setup(void)
43 50 {
44 51 int err;
45 52  
  53 + op_per_cpu_rc = 0;
  54 +
46 55 /* Grab the hardware */
47 56 err = reserve_pmc_hardware(op_handle_interrupt);
48 57 if (err)
49 58 return err;
50 59  
51 60 /* Pre-compute the values to stuff in the hardware registers. */
52   - model->reg_setup(ctr, &sys, model->num_counters);
  61 + op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
53 62  
54   - /* Configure the registers on all cpus. */
  63 + if (op_per_cpu_rc)
  64 + goto out;
  65 +
  66 + /* Configure the registers on all cpus. If an error occurs on one
  67 + * of the cpus, op_per_cpu_rc will be set to the error */
55 68 on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
56 69  
57   - return 0;
  70 +out: if (op_per_cpu_rc) {
  71 + /* error on setup release the performance counter hardware */
  72 + release_pmc_hardware();
  73 + }
  74 +
  75 + return op_per_cpu_rc;
58 76 }
59 77  
60 78 static void op_powerpc_shutdown(void)
61 79  
62 80  
63 81  
... ... @@ -64,16 +82,29 @@
64 82  
65 83 static void op_powerpc_cpu_start(void *dummy)
66 84 {
67   - model->start(ctr);
  85 + /* If any of the cpus have return an error, set the
  86 + * global flag to the error so it can be returned
  87 + * to the generic OProfile caller.
  88 + */
  89 + int ret;
  90 +
  91 + ret = model->start(ctr);
  92 + if (ret != 0)
  93 + op_per_cpu_rc = ret;
68 94 }
69 95  
70 96 static int op_powerpc_start(void)
71 97 {
  98 + op_per_cpu_rc = 0;
  99 +
72 100 if (model->global_start)
73   - model->global_start(ctr);
74   - if (model->start)
  101 + return model->global_start(ctr);
  102 + if (model->start) {
75 103 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
76   - return 0;
  104 + return op_per_cpu_rc;
  105 + }
  106 + return -EIO; /* No start function is defined for this
  107 + power architecture */
77 108 }
78 109  
79 110 static inline void op_powerpc_cpu_stop(void *dummy)
80 111  
... ... @@ -147,11 +178,13 @@
147 178  
148 179 switch (cur_cpu_spec->oprofile_type) {
149 180 #ifdef CONFIG_PPC64
150   -#ifdef CONFIG_PPC_CELL_NATIVE
  181 +#ifdef CONFIG_OPROFILE_CELL
151 182 case PPC_OPROFILE_CELL:
152 183 if (firmware_has_feature(FW_FEATURE_LPAR))
153 184 return -ENODEV;
154 185 model = &op_model_cell;
  186 + ops->sync_start = model->sync_start;
  187 + ops->sync_stop = model->sync_stop;
155 188 break;
156 189 #endif
157 190 case PPC_OPROFILE_RS64:
arch/powerpc/oprofile/op_model_7450.c
... ... @@ -81,7 +81,7 @@
81 81  
82 82 /* Configures the counters on this CPU based on the global
83 83 * settings */
84   -static void fsl7450_cpu_setup(struct op_counter_config *ctr)
  84 +static int fsl7450_cpu_setup(struct op_counter_config *ctr)
85 85 {
86 86 /* freeze all counters */
87 87 pmc_stop_ctrs();
88 88  
... ... @@ -89,12 +89,14 @@
89 89 mtspr(SPRN_MMCR0, mmcr0_val);
90 90 mtspr(SPRN_MMCR1, mmcr1_val);
91 91 mtspr(SPRN_MMCR2, mmcr2_val);
  92 +
  93 + return 0;
92 94 }
93 95  
94 96 #define NUM_CTRS 6
95 97  
96 98 /* Configures the global settings for the countes on all CPUs. */
97   -static void fsl7450_reg_setup(struct op_counter_config *ctr,
  99 +static int fsl7450_reg_setup(struct op_counter_config *ctr,
98 100 struct op_system_config *sys,
99 101 int num_ctrs)
100 102 {
101 103  
... ... @@ -126,10 +128,12 @@
126 128 | mmcr1_event6(ctr[5].event);
127 129  
128 130 mmcr2_val = 0;
  131 +
  132 + return 0;
129 133 }
130 134  
131 135 /* Sets the counters on this CPU to the chosen values, and starts them */
132   -static void fsl7450_start(struct op_counter_config *ctr)
  136 +static int fsl7450_start(struct op_counter_config *ctr)
133 137 {
134 138 int i;
135 139  
... ... @@ -148,6 +152,8 @@
148 152 pmc_start_ctrs();
149 153  
150 154 oprofile_running = 1;
  155 +
  156 + return 0;
151 157 }
152 158  
153 159 /* Stop the counters on this CPU */
... ... @@ -193,7 +199,7 @@
193 199 /* The freeze bit was set by the interrupt. */
194 200 /* Clear the freeze bit, and reenable the interrupt.
195 201 * The counters won't actually start until the rfi clears
196   - * the PMM bit */
  202 + * the PM/M bit */
197 203 pmc_start_ctrs();
198 204 }
199 205  
arch/powerpc/oprofile/op_model_cell.c
... ... @@ -5,8 +5,8 @@
5 5 *
6 6 * Author: David Erb (djerb@us.ibm.com)
7 7 * Modifications:
8   - * Carl Love <carll@us.ibm.com>
9   - * Maynard Johnson <maynardj@us.ibm.com>
  8 + * Carl Love <carll@us.ibm.com>
  9 + * Maynard Johnson <maynardj@us.ibm.com>
10 10 *
11 11 * This program is free software; you can redistribute it and/or
12 12 * modify it under the terms of the GNU General Public License
13 13  
14 14  
... ... @@ -38,12 +38,25 @@
38 38  
39 39 #include "../platforms/cell/interrupt.h"
40 40 #include "../platforms/cell/cbe_regs.h"
  41 +#include "cell/pr_util.h"
41 42  
  43 +static void cell_global_stop_spu(void);
  44 +
  45 +/*
  46 + * spu_cycle_reset is the number of cycles between samples.
  47 + * This variable is used for SPU profiling and should ONLY be set
  48 + * at the beginning of cell_reg_setup; otherwise, it's read-only.
  49 + */
  50 +static unsigned int spu_cycle_reset;
  51 +
  52 +#define NUM_SPUS_PER_NODE 8
  53 +#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
  54 +
42 55 #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
43   -#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
44   - * PPU_CYCLES event
45   - */
46   -#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
  56 +#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
  57 + * PPU_CYCLES event
  58 + */
  59 +#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
47 60  
48 61 #define NUM_THREADS 2 /* number of physical threads in
49 62 * physical processor
... ... @@ -51,6 +64,7 @@
51 64 #define NUM_TRACE_BUS_WORDS 4
52 65 #define NUM_INPUT_BUS_WORDS 2
53 66  
  67 +#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
54 68  
55 69 struct pmc_cntrl_data {
56 70 unsigned long vcntr;
57 71  
... ... @@ -62,11 +76,10 @@
62 76 /*
63 77 * ibm,cbe-perftools rtas parameters
64 78 */
65   -
66 79 struct pm_signal {
67 80 u16 cpu; /* Processor to modify */
68   - u16 sub_unit; /* hw subunit this applies to (if applicable) */
69   - short int signal_group; /* Signal Group to Enable/Disable */
  81 + u16 sub_unit; /* hw subunit this applies to (if applicable)*/
  82 + short int signal_group; /* Signal Group to Enable/Disable */
70 83 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
71 84 * Bus Word(s) (bitmask)
72 85 */
73 86  
74 87  
75 88  
... ... @@ -112,21 +125,42 @@
112 125  
113 126 static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
114 127  
115   -/* Interpetation of hdw_thread:
  128 +/*
  129 + * The CELL profiling code makes rtas calls to setup the debug bus to
  130 + * route the performance signals. Additionally, SPU profiling requires
  131 + * a second rtas call to setup the hardware to capture the SPU PCs.
  132 + * The EIO error value is returned if the token lookups or the rtas
  133 + * call fail. The EIO error number is the best choice of the existing
  134 + * error numbers. The probability of rtas related error is very low. But
  135 + * by returning EIO and printing additional information to dmsg the user
  136 + * will know that OProfile did not start and dmesg will tell them why.
  137 + * OProfile does not support returning errors on Stop. Not a huge issue
  138 + * since failure to reset the debug bus or stop the SPU PC collection is
  139 + * not a fatel issue. Chances are if the Stop failed, Start doesn't work
  140 + * either.
  141 + */
  142 +
  143 +/*
  144 + * Interpetation of hdw_thread:
116 145 * 0 - even virtual cpus 0, 2, 4,...
117 146 * 1 - odd virtual cpus 1, 3, 5, ...
  147 + *
  148 + * FIXME: this is strictly wrong, we need to clean this up in a number
  149 + * of places. It works for now. -arnd
118 150 */
119 151 static u32 hdw_thread;
120 152  
121 153 static u32 virt_cntr_inter_mask;
122 154 static struct timer_list timer_virt_cntr;
123 155  
124   -/* pm_signal needs to be global since it is initialized in
  156 +/*
  157 + * pm_signal needs to be global since it is initialized in
125 158 * cell_reg_setup at the time when the necessary information
126 159 * is available.
127 160 */
128 161 static struct pm_signal pm_signal[NR_PHYS_CTRS];
129   -static int pm_rtas_token;
  162 +static int pm_rtas_token; /* token for debug bus setup call */
  163 +static int spu_rtas_token; /* token for SPU cycle profiling */
130 164  
131 165 static u32 reset_value[NR_PHYS_CTRS];
132 166 static int num_counters;
... ... @@ -147,8 +181,8 @@
147 181 {
148 182 u64 paddr = __pa(address);
149 183  
150   - return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru,
151   - paddr >> 32, paddr & 0xffffffff, length);
  184 + return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
  185 + passthru, paddr >> 32, paddr & 0xffffffff, length);
152 186 }
153 187  
154 188 static void pm_rtas_reset_signals(u32 node)
... ... @@ -156,12 +190,13 @@
156 190 int ret;
157 191 struct pm_signal pm_signal_local;
158 192  
159   - /* The debug bus is being set to the passthru disable state.
160   - * However, the FW still expects atleast one legal signal routing
161   - * entry or it will return an error on the arguments. If we don't
162   - * supply a valid entry, we must ignore all return values. Ignoring
163   - * all return values means we might miss an error we should be
164   - * concerned about.
  193 + /*
  194 + * The debug bus is being set to the passthru disable state.
  195 + * However, the FW still expects atleast one legal signal routing
  196 + * entry or it will return an error on the arguments. If we don't
  197 + * supply a valid entry, we must ignore all return values. Ignoring
  198 + * all return values means we might miss an error we should be
  199 + * concerned about.
165 200 */
166 201  
167 202 /* fw expects physical cpu #. */
168 203  
169 204  
... ... @@ -175,18 +210,24 @@
175 210 &pm_signal_local,
176 211 sizeof(struct pm_signal));
177 212  
178   - if (ret)
  213 + if (unlikely(ret))
  214 + /*
  215 + * Not a fatal error. For Oprofile stop, the oprofile
  216 + * functions do not support returning an error for
  217 + * failure to stop OProfile.
  218 + */
179 219 printk(KERN_WARNING "%s: rtas returned: %d\n",
180 220 __FUNCTION__, ret);
181 221 }
182 222  
183   -static void pm_rtas_activate_signals(u32 node, u32 count)
  223 +static int pm_rtas_activate_signals(u32 node, u32 count)
184 224 {
185 225 int ret;
186 226 int i, j;
187 227 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
188 228  
189   - /* There is no debug setup required for the cycles event.
  229 + /*
  230 + * There is no debug setup required for the cycles event.
190 231 * Note that only events in the same group can be used.
191 232 * Otherwise, there will be conflicts in correctly routing
192 233 * the signals on the debug bus. It is the responsiblity
193 234  
194 235  
... ... @@ -213,10 +254,14 @@
213 254 pm_signal_local,
214 255 i * sizeof(struct pm_signal));
215 256  
216   - if (ret)
  257 + if (unlikely(ret)) {
217 258 printk(KERN_WARNING "%s: rtas returned: %d\n",
218 259 __FUNCTION__, ret);
  260 + return -EIO;
  261 + }
219 262 }
  263 +
  264 + return 0;
220 265 }
221 266  
222 267 /*
223 268  
... ... @@ -260,11 +305,12 @@
260 305 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
261 306 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
262 307  
263   - /* Some of the islands signal selection is based on 64 bit words.
  308 + /*
  309 + * Some of the islands signal selection is based on 64 bit words.
264 310 * The debug bus words are 32 bits, the input words to the performance
265 311 * counters are defined as 32 bits. Need to convert the 64 bit island
266 312 * specification to the appropriate 32 input bit and bus word for the
267   - * performance counter event selection. See the CELL Performance
  313 + * performance counter event selection. See the CELL Performance
268 314 * monitoring signals manual and the Perf cntr hardware descriptions
269 315 * for the details.
270 316 */
... ... @@ -298,6 +344,7 @@
298 344 input_bus[j] = i;
299 345 pm_regs.group_control |=
300 346 (i << (31 - i));
  347 +
301 348 break;
302 349 }
303 350 }
... ... @@ -309,7 +356,8 @@
309 356  
310 357 static void write_pm_cntrl(int cpu)
311 358 {
312   - /* Oprofile will use 32 bit counters, set bits 7:10 to 0
  359 + /*
  360 + * Oprofile will use 32 bit counters, set bits 7:10 to 0
313 361 * pmregs.pm_cntrl is a global
314 362 */
315 363  
... ... @@ -326,7 +374,8 @@
326 374 if (pm_regs.pm_cntrl.freeze == 1)
327 375 val |= CBE_PM_FREEZE_ALL_CTRS;
328 376  
329   - /* Routine set_count_mode must be called previously to set
  377 + /*
  378 + * Routine set_count_mode must be called previously to set
330 379 * the count mode based on the user selection of user and kernel.
331 380 */
332 381 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
... ... @@ -336,7 +385,8 @@
336 385 static inline void
337 386 set_count_mode(u32 kernel, u32 user)
338 387 {
339   - /* The user must specify user and kernel if they want them. If
  388 + /*
  389 + * The user must specify user and kernel if they want them. If
340 390 * neither is specified, OProfile will count in hypervisor mode.
341 391 * pm_regs.pm_cntrl is a global
342 392 */
... ... @@ -364,7 +414,7 @@
364 414  
365 415 /*
366 416 * Oprofile is expected to collect data on all CPUs simultaneously.
367   - * However, there is one set of performance counters per node. There are
  417 + * However, there is one set of performance counters per node. There are
368 418 * two hardware threads or virtual CPUs on each node. Hence, OProfile must
369 419 * multiplex in time the performance counter collection on the two virtual
370 420 * CPUs. The multiplexing of the performance counters is done by this
371 421  
372 422  
... ... @@ -377,19 +427,19 @@
377 427 * pair of per-cpu arrays is used for storing the previous and next
378 428 * pmc values for a given node.
379 429 * NOTE: We use the per-cpu variable to improve cache performance.
  430 + *
  431 + * This routine will alternate loading the virtual counters for
  432 + * virtual CPUs
380 433 */
381 434 static void cell_virtual_cntr(unsigned long data)
382 435 {
383   - /* This routine will alternate loading the virtual counters for
384   - * virtual CPUs
385   - */
386 436 int i, prev_hdw_thread, next_hdw_thread;
387 437 u32 cpu;
388 438 unsigned long flags;
389 439  
390   - /* Make sure that the interrupt_hander and
391   - * the virt counter are not both playing with
392   - * the counters on the same node.
  440 + /*
  441 + * Make sure that the interrupt_hander and the virt counter are
  442 + * not both playing with the counters on the same node.
393 443 */
394 444  
395 445 spin_lock_irqsave(&virt_cntr_lock, flags);
396 446  
397 447  
398 448  
... ... @@ -400,22 +450,25 @@
400 450 hdw_thread = 1 ^ hdw_thread;
401 451 next_hdw_thread = hdw_thread;
402 452  
403   - for (i = 0; i < num_counters; i++)
404   - /* There are some per thread events. Must do the
  453 + /*
  454 + * There are some per thread events. Must do the
405 455 * set event, for the thread that is being started
406 456 */
  457 + for (i = 0; i < num_counters; i++)
407 458 set_pm_event(i,
408 459 pmc_cntrl[next_hdw_thread][i].evnts,
409 460 pmc_cntrl[next_hdw_thread][i].masks);
410 461  
411   - /* The following is done only once per each node, but
  462 + /*
  463 + * The following is done only once per each node, but
412 464 * we need cpu #, not node #, to pass to the cbe_xxx functions.
413 465 */
414 466 for_each_online_cpu(cpu) {
415 467 if (cbe_get_hw_thread_id(cpu))
416 468 continue;
417 469  
418   - /* stop counters, save counter values, restore counts
  470 + /*
  471 + * stop counters, save counter values, restore counts
419 472 * for previous thread
420 473 */
421 474 cbe_disable_pm(cpu);
... ... @@ -428,7 +481,7 @@
428 481 == 0xFFFFFFFF)
429 482 /* If the cntr value is 0xffffffff, we must
430 483 * reset that to 0xfffffff0 when the current
431   - * thread is restarted. This will generate a
  484 + * thread is restarted. This will generate a
432 485 * new interrupt and make sure that we never
433 486 * restore the counters to the max value. If
434 487 * the counters were restored to the max value,
435 488  
... ... @@ -444,13 +497,15 @@
444 497 next_hdw_thread)[i]);
445 498 }
446 499  
447   - /* Switch to the other thread. Change the interrupt
  500 + /*
  501 + * Switch to the other thread. Change the interrupt
448 502 * and control regs to be scheduled on the CPU
449 503 * corresponding to the thread to execute.
450 504 */
451 505 for (i = 0; i < num_counters; i++) {
452 506 if (pmc_cntrl[next_hdw_thread][i].enabled) {
453   - /* There are some per thread events.
  507 + /*
  508 + * There are some per thread events.
454 509 * Must do the set event, enable_cntr
455 510 * for each cpu.
456 511 */
457 512  
458 513  
459 514  
460 515  
... ... @@ -482,17 +537,42 @@
482 537 }
483 538  
484 539 /* This function is called once for all cpus combined */
485   -static void
486   -cell_reg_setup(struct op_counter_config *ctr,
487   - struct op_system_config *sys, int num_ctrs)
  540 +static int cell_reg_setup(struct op_counter_config *ctr,
  541 + struct op_system_config *sys, int num_ctrs)
488 542 {
489 543 int i, j, cpu;
  544 + spu_cycle_reset = 0;
490 545  
  546 + if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
  547 + spu_cycle_reset = ctr[0].count;
  548 +
  549 + /*
  550 + * Each node will need to make the rtas call to start
  551 + * and stop SPU profiling. Get the token once and store it.
  552 + */
  553 + spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
  554 +
  555 + if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
  556 + printk(KERN_ERR
  557 + "%s: rtas token ibm,cbe-spu-perftools unknown\n",
  558 + __FUNCTION__);
  559 + return -EIO;
  560 + }
  561 + }
  562 +
491 563 pm_rtas_token = rtas_token("ibm,cbe-perftools");
492   - if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
493   - printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
  564 +
  565 + /*
  566 + * For all events excetp PPU CYCLEs, each node will need to make
  567 + * the rtas cbe-perftools call to setup and reset the debug bus.
  568 + * Make the token lookup call once and store it in the global
  569 + * variable pm_rtas_token.
  570 + */
  571 + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
  572 + printk(KERN_ERR
  573 + "%s: rtas token ibm,cbe-perftools unknown\n",
494 574 __FUNCTION__);
495   - goto out;
  575 + return -EIO;
496 576 }
497 577  
498 578 num_counters = num_ctrs;
... ... @@ -520,7 +600,8 @@
520 600 per_cpu(pmc_values, j)[i] = 0;
521 601 }
522 602  
523   - /* Setup the thread 1 events, map the thread 0 event to the
  603 + /*
  604 + * Setup the thread 1 events, map the thread 0 event to the
524 605 * equivalent thread 1 event.
525 606 */
526 607 for (i = 0; i < num_ctrs; ++i) {
527 608  
... ... @@ -544,9 +625,10 @@
544 625 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
545 626 input_bus[i] = 0xff;
546 627  
547   - /* Our counters count up, and "count" refers to
  628 + /*
  629 + * Our counters count up, and "count" refers to
548 630 * how much before the next interrupt, and we interrupt
549   - * on overflow. So we calculate the starting value
  631 + * on overflow. So we calculate the starting value
550 632 * which will give us "count" until overflow.
551 633 * Then we set the events on the enabled counters.
552 634 */
553 635  
554 636  
555 637  
556 638  
557 639  
... ... @@ -569,29 +651,28 @@
569 651 for (i = 0; i < num_counters; ++i) {
570 652 per_cpu(pmc_values, cpu)[i] = reset_value[i];
571 653 }
572   -out:
573   - ;
  654 +
  655 + return 0;
574 656 }
575 657  
  658 +
  659 +
576 660 /* This function is called once for each cpu */
577   -static void cell_cpu_setup(struct op_counter_config *cntr)
  661 +static int cell_cpu_setup(struct op_counter_config *cntr)
578 662 {
579 663 u32 cpu = smp_processor_id();
580 664 u32 num_enabled = 0;
581 665 int i;
582 666  
  667 + if (spu_cycle_reset)
  668 + return 0;
  669 +
583 670 /* There is one performance monitor per processor chip (i.e. node),
584 671 * so we only need to perform this function once per node.
585 672 */
586 673 if (cbe_get_hw_thread_id(cpu))
587   - goto out;
  674 + return 0;
588 675  
589   - if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
590   - printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
591   - __FUNCTION__);
592   - goto out;
593   - }
594   -
595 676 /* Stop all counters */
596 677 cbe_disable_pm(cpu);
597 678 cbe_disable_pm_interrupts(cpu);
598 679  
599 680  
600 681  
601 682  
... ... @@ -609,16 +690,286 @@
609 690 }
610 691 }
611 692  
612   - pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
  693 + /*
  694 + * The pm_rtas_activate_signals will return -EIO if the FW
  695 + * call failed.
  696 + */
  697 + return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
  698 +}
  699 +
  700 +#define ENTRIES 303
  701 +#define MAXLFSR 0xFFFFFF
  702 +
  703 +/* precomputed table of 24 bit LFSR values */
  704 +static int initial_lfsr[] = {
  705 + 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
  706 + 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
  707 + 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
  708 + 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
  709 + 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
  710 + 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
  711 + 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
  712 + 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
  713 + 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
  714 + 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
  715 + 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
  716 + 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
  717 + 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
  718 + 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
  719 + 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
  720 + 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
  721 + 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
  722 + 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
  723 + 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
  724 + 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
  725 + 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
  726 + 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
  727 + 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
  728 + 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
  729 + 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
  730 + 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
  731 + 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
  732 + 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
  733 + 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
  734 + 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
  735 + 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
  736 + 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
  737 + 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
  738 + 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
  739 + 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
  740 + 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
  741 + 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
  742 + 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
  743 +};
  744 +
  745 +/*
  746 + * The hardware uses an LFSR counting sequence to determine when to capture
  747 + * the SPU PCs. An LFSR sequence is like a puesdo random number sequence
  748 + * where each number occurs once in the sequence but the sequence is not in
  749 + * numerical order. The SPU PC capture is done when the LFSR sequence reaches
  750 + * the last value in the sequence. Hence the user specified value N
  751 + * corresponds to the LFSR number that is N from the end of the sequence.
  752 + *
  753 + * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
  754 + * LFSR sequence is broken into four ranges. The spacing of the precomputed
  755 + * values is adjusted in each range so the error between the user specifed
  756 + * number (N) of events between samples and the actual number of events based
  757 + * on the precomputed value will be les then about 6.2%. Note, if the user
  758 + * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
  759 + * This is to prevent the loss of samples because the trace buffer is full.
  760 + *
  761 + * User specified N Step between Index in
  762 + * precomputed values precomputed
  763 + * table
  764 + * 0 to 2^16-1 ---- 0
  765 + * 2^16 to 2^16+2^19-1 2^12 1 to 128
  766 + * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256
  767 + * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302
  768 + *
  769 + *
  770 + * For example, the LFSR values in the second range are computed for 2^16,
  771 + * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
  772 + * 1, 2,..., 127, 128.
  773 + *
  774 + * The 24 bit LFSR value for the nth number in the sequence can be
  775 + * calculated using the following code:
  776 + *
  777 + * #define size 24
  778 + * int calculate_lfsr(int n)
  779 + * {
  780 + * int i;
  781 + * unsigned int newlfsr0;
  782 + * unsigned int lfsr = 0xFFFFFF;
  783 + * unsigned int howmany = n;
  784 + *
  785 + * for (i = 2; i < howmany + 2; i++) {
  786 + * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
  787 + * ((lfsr >> (size - 1 - 1)) & 1) ^
  788 + * (((lfsr >> (size - 1 - 6)) & 1) ^
  789 + * ((lfsr >> (size - 1 - 23)) & 1)));
  790 + *
  791 + * lfsr >>= 1;
  792 + * lfsr = lfsr | (newlfsr0 << (size - 1));
  793 + * }
  794 + * return lfsr;
  795 + * }
  796 + */
  797 +
  798 +#define V2_16 (0x1 << 16)
  799 +#define V2_19 (0x1 << 19)
  800 +#define V2_22 (0x1 << 22)
  801 +
  802 +static int calculate_lfsr(int n)
  803 +{
  804 + /*
  805 + * The ranges and steps are in powers of 2 so the calculations
  806 + * can be done using shifts rather then divide.
  807 + */
  808 + int index;
  809 +
  810 + if ((n >> 16) == 0)
  811 + index = 0;
  812 + else if (((n - V2_16) >> 19) == 0)
  813 + index = ((n - V2_16) >> 12) + 1;
  814 + else if (((n - V2_16 - V2_19) >> 22) == 0)
  815 + index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
  816 + else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
  817 + index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
  818 + else
  819 + index = ENTRIES-1;
  820 +
  821 + /* make sure index is valid */
  822 + if ((index > ENTRIES) || (index < 0))
  823 + index = ENTRIES-1;
  824 +
  825 + return initial_lfsr[index];
  826 +}
  827 +
  828 +static int pm_rtas_activate_spu_profiling(u32 node)
  829 +{
  830 + int ret, i;
  831 + struct pm_signal pm_signal_local[NR_PHYS_CTRS];
  832 +
  833 + /*
  834 + * Set up the rtas call to configure the debug bus to
  835 + * route the SPU PCs. Setup the pm_signal for each SPU
  836 + */
  837 + for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
  838 + pm_signal_local[i].cpu = node;
  839 + pm_signal_local[i].signal_group = 41;
  840 + /* spu i on word (i/2) */
  841 + pm_signal_local[i].bus_word = 1 << i / 2;
  842 + /* spu i */
  843 + pm_signal_local[i].sub_unit = i;
  844 + pm_signal_local[i].bit = 63;
  845 + }
  846 +
  847 + ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
  848 + PASSTHRU_ENABLE, pm_signal_local,
  849 + (NUM_SPUS_PER_NODE
  850 + * sizeof(struct pm_signal)));
  851 +
  852 + if (unlikely(ret)) {
  853 + printk(KERN_WARNING "%s: rtas returned: %d\n",
  854 + __FUNCTION__, ret);
  855 + return -EIO;
  856 + }
  857 +
  858 + return 0;
  859 +}
  860 +
  861 +#ifdef CONFIG_CPU_FREQ
  862 +static int
  863 +oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
  864 +{
  865 + int ret = 0;
  866 + struct cpufreq_freqs *frq = data;
  867 + if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
  868 + (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
  869 + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
  870 + set_spu_profiling_frequency(frq->new, spu_cycle_reset);
  871 + return ret;
  872 +}
  873 +
  874 +static struct notifier_block cpu_freq_notifier_block = {
  875 + .notifier_call = oprof_cpufreq_notify
  876 +};
  877 +#endif
  878 +
  879 +static int cell_global_start_spu(struct op_counter_config *ctr)
  880 +{
  881 + int subfunc;
  882 + unsigned int lfsr_value;
  883 + int cpu;
  884 + int ret;
  885 + int rtas_error;
  886 + unsigned int cpu_khzfreq = 0;
  887 +
  888 + /* The SPU profiling uses time-based profiling based on
  889 + * cpu frequency, so if configured with the CPU_FREQ
  890 + * option, we should detect frequency changes and react
  891 + * accordingly.
  892 + */
  893 +#ifdef CONFIG_CPU_FREQ
  894 + ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
  895 + CPUFREQ_TRANSITION_NOTIFIER);
  896 + if (ret < 0)
  897 + /* this is not a fatal error */
  898 + printk(KERN_ERR "CPU freq change registration failed: %d\n",
  899 + ret);
  900 +
  901 + else
  902 + cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
  903 +#endif
  904 +
  905 + set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
  906 +
  907 + for_each_online_cpu(cpu) {
  908 + if (cbe_get_hw_thread_id(cpu))
  909 + continue;
  910 +
  911 + /*
  912 + * Setup SPU cycle-based profiling.
  913 + * Set perf_mon_control bit 0 to a zero before
  914 + * enabling spu collection hardware.
  915 + */
  916 + cbe_write_pm(cpu, pm_control, 0);
  917 +
  918 + if (spu_cycle_reset > MAX_SPU_COUNT)
  919 + /* use largest possible value */
  920 + lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
  921 + else
  922 + lfsr_value = calculate_lfsr(spu_cycle_reset);
  923 +
  924 + /* must use a non zero value. Zero disables data collection. */
  925 + if (lfsr_value == 0)
  926 + lfsr_value = calculate_lfsr(1);
  927 +
  928 + lfsr_value = lfsr_value << 8; /* shift lfsr to correct
  929 + * register location
  930 + */
  931 +
  932 + /* debug bus setup */
  933 + ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
  934 +
  935 + if (unlikely(ret)) {
  936 + rtas_error = ret;
  937 + goto out;
  938 + }
  939 +
  940 +
  941 + subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */
  942 +
  943 + /* start profiling */
  944 + ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
  945 + cbe_cpu_to_node(cpu), lfsr_value);
  946 +
  947 + if (unlikely(ret != 0)) {
  948 + printk(KERN_ERR
  949 + "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
  950 + __FUNCTION__, ret);
  951 + rtas_error = -EIO;
  952 + goto out;
  953 + }
  954 + }
  955 +
  956 + rtas_error = start_spu_profiling(spu_cycle_reset);
  957 + if (rtas_error)
  958 + goto out_stop;
  959 +
  960 + oprofile_running = 1;
  961 + return 0;
  962 +
  963 +out_stop:
  964 + cell_global_stop_spu(); /* clean up the PMU/debug bus */
613 965 out:
614   - ;
  966 + return rtas_error;
615 967 }
616 968  
617   -static void cell_global_start(struct op_counter_config *ctr)
  969 +static int cell_global_start_ppu(struct op_counter_config *ctr)
618 970 {
619   - u32 cpu;
  971 + u32 cpu, i;
620 972 u32 interrupt_mask = 0;
621   - u32 i;
622 973  
623 974 /* This routine gets called once for the system.
624 975 * There is one performance monitor per node, so we
625 976  
626 977  
627 978  
628 979  
... ... @@ -651,19 +1002,79 @@
651 1002 oprofile_running = 1;
652 1003 smp_wmb();
653 1004  
654   - /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
655   - * executed which manipulates the PMU. We start the "virtual counter"
  1005 + /*
  1006 + * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
  1007 + * executed which manipulates the PMU. We start the "virtual counter"
656 1008 * here so that we do not need to synchronize access to the PMU in
657 1009 * the above for-loop.
658 1010 */
659 1011 start_virt_cntrs();
  1012 +
  1013 + return 0;
660 1014 }
661 1015  
662   -static void cell_global_stop(void)
  1016 +static int cell_global_start(struct op_counter_config *ctr)
663 1017 {
  1018 + if (spu_cycle_reset)
  1019 + return cell_global_start_spu(ctr);
  1020 + else
  1021 + return cell_global_start_ppu(ctr);
  1022 +}
  1023 +
  1024 +/*
  1025 + * Note the generic OProfile stop calls do not support returning
  1026 + * an error on stop. Hence, will not return an error if the FW
  1027 + * calls fail on stop. Failure to reset the debug bus is not an issue.
  1028 + * Failure to disable the SPU profiling is not an issue. The FW calls
  1029 + * to enable the performance counters and debug bus will work even if
  1030 + * the hardware was not cleanly reset.
  1031 + */
  1032 +static void cell_global_stop_spu(void)
  1033 +{
  1034 + int subfunc, rtn_value;
  1035 + unsigned int lfsr_value;
664 1036 int cpu;
665 1037  
666   - /* This routine will be called once for the system.
  1038 + oprofile_running = 0;
  1039 +
  1040 +#ifdef CONFIG_CPU_FREQ
  1041 + cpufreq_unregister_notifier(&cpu_freq_notifier_block,
  1042 + CPUFREQ_TRANSITION_NOTIFIER);
  1043 +#endif
  1044 +
  1045 + for_each_online_cpu(cpu) {
  1046 + if (cbe_get_hw_thread_id(cpu))
  1047 + continue;
  1048 +
  1049 + subfunc = 3; /*
  1050 + * 2 - activate SPU tracing,
  1051 + * 3 - deactivate
  1052 + */
  1053 + lfsr_value = 0x8f100000;
  1054 +
  1055 + rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
  1056 + subfunc, cbe_cpu_to_node(cpu),
  1057 + lfsr_value);
  1058 +
  1059 + if (unlikely(rtn_value != 0)) {
  1060 + printk(KERN_ERR
  1061 + "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
  1062 + __FUNCTION__, rtn_value);
  1063 + }
  1064 +
  1065 + /* Deactivate the signals */
  1066 + pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
  1067 + }
  1068 +
  1069 + stop_spu_profiling();
  1070 +}
  1071 +
  1072 +static void cell_global_stop_ppu(void)
  1073 +{
  1074 + int cpu;
  1075 +
  1076 + /*
  1077 + * This routine will be called once for the system.
667 1078 * There is one performance monitor per node, so we
668 1079 * only need to perform this function once per node.
669 1080 */
670 1081  
... ... @@ -687,9 +1098,17 @@
687 1098 }
688 1099 }
689 1100  
690   -static void
691   -cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
  1101 +static void cell_global_stop(void)
692 1102 {
  1103 + if (spu_cycle_reset)
  1104 + cell_global_stop_spu();
  1105 + else
  1106 + cell_global_stop_ppu();
  1107 +}
  1108 +
  1109 +static void cell_handle_interrupt(struct pt_regs *regs,
  1110 + struct op_counter_config *ctr)
  1111 +{
693 1112 u32 cpu;
694 1113 u64 pc;
695 1114 int is_kernel;
696 1115  
... ... @@ -699,13 +1118,15 @@
699 1118  
700 1119 cpu = smp_processor_id();
701 1120  
702   - /* Need to make sure the interrupt handler and the virt counter
  1121 + /*
  1122 + * Need to make sure the interrupt handler and the virt counter
703 1123 * routine are not running at the same time. See the
704 1124 * cell_virtual_cntr() routine for additional comments.
705 1125 */
706 1126 spin_lock_irqsave(&virt_cntr_lock, flags);
707 1127  
708   - /* Need to disable and reenable the performance counters
  1128 + /*
  1129 + * Need to disable and reenable the performance counters
709 1130 * to get the desired behavior from the hardware. This
710 1131 * is hardware specific.
711 1132 */
... ... @@ -714,7 +1135,8 @@
714 1135  
715 1136 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
716 1137  
717   - /* If the interrupt mask has been cleared, then the virt cntr
  1138 + /*
  1139 + * If the interrupt mask has been cleared, then the virt cntr
718 1140 * has cleared the interrupt. When the thread that generated
719 1141 * the interrupt is restored, the data count will be restored to
720 1142 * 0xffffff0 to cause the interrupt to be regenerated.
721 1143  
722 1144  
... ... @@ -732,18 +1154,20 @@
732 1154 }
733 1155 }
734 1156  
735   - /* The counters were frozen by the interrupt.
  1157 + /*
  1158 + * The counters were frozen by the interrupt.
736 1159 * Reenable the interrupt and restart the counters.
737 1160 * If there was a race between the interrupt handler and
738   - * the virtual counter routine. The virutal counter
  1161 + * the virtual counter routine. The virutal counter
739 1162 * routine may have cleared the interrupts. Hence must
740 1163 * use the virt_cntr_inter_mask to re-enable the interrupts.
741 1164 */
742 1165 cbe_enable_pm_interrupts(cpu, hdw_thread,
743 1166 virt_cntr_inter_mask);
744 1167  
745   - /* The writes to the various performance counters only writes
746   - * to a latch. The new values (interrupt setting bits, reset
  1168 + /*
  1169 + * The writes to the various performance counters only writes
  1170 + * to a latch. The new values (interrupt setting bits, reset
747 1171 * counter value etc.) are not copied to the actual registers
748 1172 * until the performance monitor is enabled. In order to get
749 1173 * this to work as desired, the permormance monitor needs to
750 1174  
... ... @@ -755,11 +1179,34 @@
755 1179 spin_unlock_irqrestore(&virt_cntr_lock, flags);
756 1180 }
757 1181  
  1182 +/*
  1183 + * This function is called from the generic OProfile
  1184 + * driver. When profiling PPUs, we need to do the
  1185 + * generic sync start; otherwise, do spu_sync_start.
  1186 + */
  1187 +static int cell_sync_start(void)
  1188 +{
  1189 + if (spu_cycle_reset)
  1190 + return spu_sync_start();
  1191 + else
  1192 + return DO_GENERIC_SYNC;
  1193 +}
  1194 +
  1195 +static int cell_sync_stop(void)
  1196 +{
  1197 + if (spu_cycle_reset)
  1198 + return spu_sync_stop();
  1199 + else
  1200 + return 1;
  1201 +}
  1202 +
758 1203 struct op_powerpc_model op_model_cell = {
759 1204 .reg_setup = cell_reg_setup,
760 1205 .cpu_setup = cell_cpu_setup,
761 1206 .global_start = cell_global_start,
762 1207 .global_stop = cell_global_stop,
  1208 + .sync_start = cell_sync_start,
  1209 + .sync_stop = cell_sync_stop,
763 1210 .handle_interrupt = cell_handle_interrupt,
764 1211 };
arch/powerpc/oprofile/op_model_fsl_booke.c
... ... @@ -244,7 +244,7 @@
244 244 mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
245 245 }
246 246  
247   -static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
  247 +static int fsl_booke_cpu_setup(struct op_counter_config *ctr)
248 248 {
249 249 int i;
250 250  
251 251  
... ... @@ -258,9 +258,11 @@
258 258  
259 259 set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
260 260 }
  261 +
  262 + return 0;
261 263 }
262 264  
263   -static void fsl_booke_reg_setup(struct op_counter_config *ctr,
  265 +static int fsl_booke_reg_setup(struct op_counter_config *ctr,
264 266 struct op_system_config *sys,
265 267 int num_ctrs)
266 268 {
267 269  
... ... @@ -276,9 +278,10 @@
276 278 for (i = 0; i < num_counters; ++i)
277 279 reset_value[i] = 0x80000000UL - ctr[i].count;
278 280  
  281 + return 0;
279 282 }
280 283  
281   -static void fsl_booke_start(struct op_counter_config *ctr)
  284 +static int fsl_booke_start(struct op_counter_config *ctr)
282 285 {
283 286 int i;
284 287  
... ... @@ -308,6 +311,8 @@
308 311  
309 312 pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
310 313 mfpmr(PMRN_PMGC0));
  314 +
  315 + return 0;
311 316 }
312 317  
313 318 static void fsl_booke_stop(void)
arch/powerpc/oprofile/op_model_pa6t.c
... ... @@ -89,7 +89,7 @@
89 89  
90 90  
91 91 /* precompute the values to stuff in the hardware registers */
92   -static void pa6t_reg_setup(struct op_counter_config *ctr,
  92 +static int pa6t_reg_setup(struct op_counter_config *ctr,
93 93 struct op_system_config *sys,
94 94 int num_ctrs)
95 95 {
96 96  
... ... @@ -135,10 +135,12 @@
135 135 pr_debug("reset_value for pmc%u inited to 0x%lx\n",
136 136 pmc, reset_value[pmc]);
137 137 }
  138 +
  139 + return 0;
138 140 }
139 141  
140 142 /* configure registers on this cpu */
141   -static void pa6t_cpu_setup(struct op_counter_config *ctr)
  143 +static int pa6t_cpu_setup(struct op_counter_config *ctr)
142 144 {
143 145 u64 mmcr0 = mmcr0_val;
144 146 u64 mmcr1 = mmcr1_val;
145 147  
... ... @@ -154,9 +156,11 @@
154 156 mfspr(SPRN_PA6T_MMCR0));
155 157 pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
156 158 mfspr(SPRN_PA6T_MMCR1));
  159 +
  160 + return 0;
157 161 }
158 162  
159   -static void pa6t_start(struct op_counter_config *ctr)
  163 +static int pa6t_start(struct op_counter_config *ctr)
160 164 {
161 165 int i;
162 166  
... ... @@ -174,6 +178,8 @@
174 178 oprofile_running = 1;
175 179  
176 180 pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0);
  181 +
  182 + return 0;
177 183 }
178 184  
179 185 static void pa6t_stop(void)
arch/powerpc/oprofile/op_model_power4.c
... ... @@ -32,7 +32,7 @@
32 32 static u64 mmcr1_val;
33 33 static u64 mmcra_val;
34 34  
35   -static void power4_reg_setup(struct op_counter_config *ctr,
  35 +static int power4_reg_setup(struct op_counter_config *ctr,
36 36 struct op_system_config *sys,
37 37 int num_ctrs)
38 38 {
... ... @@ -60,6 +60,8 @@
60 60 mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
61 61 else
62 62 mmcr0_val |= MMCR0_PROBLEM_DISABLE;
  63 +
  64 + return 0;
63 65 }
64 66  
65 67 extern void ppc64_enable_pmcs(void);
... ... @@ -84,7 +86,7 @@
84 86 return 0;
85 87 }
86 88  
87   -static void power4_cpu_setup(struct op_counter_config *ctr)
  89 +static int power4_cpu_setup(struct op_counter_config *ctr)
88 90 {
89 91 unsigned int mmcr0 = mmcr0_val;
90 92 unsigned long mmcra = mmcra_val;
91 93  
... ... @@ -111,9 +113,11 @@
111 113 mfspr(SPRN_MMCR1));
112 114 dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
113 115 mfspr(SPRN_MMCRA));
  116 +
  117 + return 0;
114 118 }
115 119  
116   -static void power4_start(struct op_counter_config *ctr)
  120 +static int power4_start(struct op_counter_config *ctr)
117 121 {
118 122 int i;
119 123 unsigned int mmcr0;
... ... @@ -148,6 +152,7 @@
148 152 oprofile_running = 1;
149 153  
150 154 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
  155 + return 0;
151 156 }
152 157  
153 158 static void power4_stop(void)
arch/powerpc/oprofile/op_model_rs64.c
... ... @@ -88,7 +88,7 @@
88 88  
89 89 static int num_counters;
90 90  
91   -static void rs64_reg_setup(struct op_counter_config *ctr,
  91 +static int rs64_reg_setup(struct op_counter_config *ctr,
92 92 struct op_system_config *sys,
93 93 int num_ctrs)
94 94 {
95 95  
... ... @@ -100,9 +100,10 @@
100 100 reset_value[i] = 0x80000000UL - ctr[i].count;
101 101  
102 102 /* XXX setup user and kernel profiling */
  103 + return 0;
103 104 }
104 105  
105   -static void rs64_cpu_setup(struct op_counter_config *ctr)
  106 +static int rs64_cpu_setup(struct op_counter_config *ctr)
106 107 {
107 108 unsigned int mmcr0;
108 109  
109 110  
... ... @@ -125,9 +126,11 @@
125 126 mfspr(SPRN_MMCR0));
126 127 dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
127 128 mfspr(SPRN_MMCR1));
  129 +
  130 + return 0;
128 131 }
129 132  
130   -static void rs64_start(struct op_counter_config *ctr)
  133 +static int rs64_start(struct op_counter_config *ctr)
131 134 {
132 135 int i;
133 136 unsigned int mmcr0;
... ... @@ -155,6 +158,7 @@
155 158 mtspr(SPRN_MMCR0, mmcr0);
156 159  
157 160 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
  161 + return 0;
158 162 }
159 163  
160 164 static void rs64_stop(void)
arch/powerpc/platforms/cell/spufs/context.c
... ... @@ -22,6 +22,7 @@
22 22  
23 23 #include <linux/fs.h>
24 24 #include <linux/mm.h>
  25 +#include <linux/module.h>
25 26 #include <linux/slab.h>
26 27 #include <asm/atomic.h>
27 28 #include <asm/spu.h>
... ... @@ -81,6 +82,8 @@
81 82 spu_fini_csa(&ctx->csa);
82 83 if (ctx->gang)
83 84 spu_gang_remove_ctx(ctx->gang, ctx);
  85 + if (ctx->prof_priv_kref)
  86 + kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
84 87 BUG_ON(!list_empty(&ctx->rq));
85 88 atomic_dec(&nr_spu_contexts);
86 89 kfree(ctx);
... ... @@ -185,4 +188,19 @@
185 188  
186 189 spu_release(ctx);
187 190 }
  191 +
  192 +void spu_set_profile_private_kref(struct spu_context *ctx,
  193 + struct kref *prof_info_kref,
  194 + void ( * prof_info_release) (struct kref *kref))
  195 +{
  196 + ctx->prof_priv_kref = prof_info_kref;
  197 + ctx->prof_priv_release = prof_info_release;
  198 +}
  199 +EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
  200 +
  201 +void *spu_get_profile_private_kref(struct spu_context *ctx)
  202 +{
  203 + return ctx->prof_priv_kref;
  204 +}
  205 +EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
arch/powerpc/platforms/cell/spufs/sched.c
... ... @@ -274,6 +274,7 @@
274 274 ctx->spu = spu;
275 275 ctx->ops = &spu_hw_ops;
276 276 spu->pid = current->pid;
  277 + spu->tgid = current->tgid;
277 278 spu_associate_mm(spu, ctx->owner);
278 279 spu->ibox_callback = spufs_ibox_callback;
279 280 spu->wbox_callback = spufs_wbox_callback;
... ... @@ -456,6 +457,7 @@
456 457 spu->dma_callback = NULL;
457 458 spu_associate_mm(spu, NULL);
458 459 spu->pid = 0;
  460 + spu->tgid = 0;
459 461 ctx->ops = &spu_backing_ops;
460 462 spu->flags = 0;
461 463 spu->ctx = NULL;
... ... @@ -737,7 +739,7 @@
737 739 }
738 740  
739 741 /**
740   - * spu_yield - yield a physical spu if others are waiting
  742 + * spu_yield - yield a physical spu if others are waiting
741 743 * @ctx: spu context to yield
742 744 *
743 745 * Check if there is a higher priority context waiting and if yes
arch/powerpc/platforms/cell/spufs/spufs.h
... ... @@ -85,6 +85,8 @@
85 85  
86 86 struct list_head gang_list;
87 87 struct spu_gang *gang;
  88 + struct kref *prof_priv_kref;
  89 + void ( * prof_priv_release) (struct kref *kref);
88 90  
89 91 /* owner thread */
90 92 pid_t tid;
drivers/oprofile/buffer_sync.c
... ... @@ -26,8 +26,9 @@
26 26 #include <linux/profile.h>
27 27 #include <linux/module.h>
28 28 #include <linux/fs.h>
  29 +#include <linux/oprofile.h>
29 30 #include <linux/sched.h>
30   -
  31 +
31 32 #include "oprofile_stats.h"
32 33 #include "event_buffer.h"
33 34 #include "cpu_buffer.h"
drivers/oprofile/event_buffer.h
... ... @@ -19,28 +19,10 @@
19 19  
20 20 /* wake up the process sleeping on the event file */
21 21 void wake_up_buffer_waiter(void);
22   -
23   -/* Each escaped entry is prefixed by ESCAPE_CODE
24   - * then one of the following codes, then the
25   - * relevant data.
26   - */
27   -#define ESCAPE_CODE ~0UL
28   -#define CTX_SWITCH_CODE 1
29   -#define CPU_SWITCH_CODE 2
30   -#define COOKIE_SWITCH_CODE 3
31   -#define KERNEL_ENTER_SWITCH_CODE 4
32   -#define KERNEL_EXIT_SWITCH_CODE 5
33   -#define MODULE_LOADED_CODE 6
34   -#define CTX_TGID_CODE 7
35   -#define TRACE_BEGIN_CODE 8
36   -#define TRACE_END_CODE 9
37   -
  22 +
38 23 #define INVALID_COOKIE ~0UL
39 24 #define NO_COOKIE 0UL
40 25  
41   -/* add data to the event buffer */
42   -void add_event_entry(unsigned long data);
43   -
44 26 extern const struct file_operations event_buffer_fops;
45 27  
46 28 /* mutex between sync_cpu_buffers() and the
drivers/oprofile/oprof.c
... ... @@ -53,9 +53,24 @@
53 53 * us missing task deaths and eventually oopsing
54 54 * when trying to process the event buffer.
55 55 */
  56 + if (oprofile_ops.sync_start) {
  57 + int sync_ret = oprofile_ops.sync_start();
  58 + switch (sync_ret) {
  59 + case 0:
  60 + goto post_sync;
  61 + case 1:
  62 + goto do_generic;
  63 + case -1:
  64 + goto out3;
  65 + default:
  66 + goto out3;
  67 + }
  68 + }
  69 +do_generic:
56 70 if ((err = sync_start()))
57 71 goto out3;
58 72  
  73 +post_sync:
59 74 is_setup = 1;
60 75 mutex_unlock(&start_mutex);
61 76 return 0;
62 77  
... ... @@ -118,7 +133,20 @@
118 133 void oprofile_shutdown(void)
119 134 {
120 135 mutex_lock(&start_mutex);
  136 + if (oprofile_ops.sync_stop) {
  137 + int sync_ret = oprofile_ops.sync_stop();
  138 + switch (sync_ret) {
  139 + case 0:
  140 + goto post_sync;
  141 + case 1:
  142 + goto do_generic;
  143 + default:
  144 + goto post_sync;
  145 + }
  146 + }
  147 +do_generic:
121 148 sync_stop();
  149 +post_sync:
122 150 if (oprofile_ops.shutdown)
123 151 oprofile_ops.shutdown();
124 152 is_setup = 0;
include/asm-powerpc/oprofile_impl.h
... ... @@ -39,14 +39,16 @@
39 39  
40 40 /* Per-arch configuration */
41 41 struct op_powerpc_model {
42   - void (*reg_setup) (struct op_counter_config *,
  42 + int (*reg_setup) (struct op_counter_config *,
43 43 struct op_system_config *,
44 44 int num_counters);
45   - void (*cpu_setup) (struct op_counter_config *);
46   - void (*start) (struct op_counter_config *);
47   - void (*global_start) (struct op_counter_config *);
  45 + int (*cpu_setup) (struct op_counter_config *);
  46 + int (*start) (struct op_counter_config *);
  47 + int (*global_start) (struct op_counter_config *);
48 48 void (*stop) (void);
49 49 void (*global_stop) (void);
  50 + int (*sync_start)(void);
  51 + int (*sync_stop)(void);
50 52 void (*handle_interrupt) (struct pt_regs *,
51 53 struct op_counter_config *);
52 54 int num_counters;
include/asm-powerpc/spu.h
... ... @@ -138,6 +138,7 @@
138 138 struct spu_runqueue *rq;
139 139 unsigned long long timestamp;
140 140 pid_t pid;
  141 + pid_t tgid;
141 142 int class_0_pending;
142 143 spinlock_t register_lock;
143 144  
... ... @@ -216,6 +217,20 @@
216 217 /* Calls from the memory management to the SPU */
217 218 struct mm_struct;
218 219 extern void spu_flush_all_slbs(struct mm_struct *mm);
  220 +
  221 +/* This interface allows a profiler (e.g., OProfile) to store a ref
  222 + * to spu context information that it creates. This caching technique
  223 + * avoids the need to recreate this information after a save/restore operation.
  224 + *
  225 + * Assumes the caller has already incremented the ref count to
  226 + * profile_info; then spu_context_destroy must call kref_put
  227 + * on prof_info_kref.
  228 + */
  229 +void spu_set_profile_private_kref(struct spu_context *ctx,
  230 + struct kref *prof_info_kref,
  231 + void ( * prof_info_release) (struct kref *kref));
  232 +
  233 +void *spu_get_profile_private_kref(struct spu_context *ctx);
219 234  
220 235 /* system callbacks from the SPU */
221 236 struct spu_syscall_block {
include/linux/dcookies.h
... ... @@ -12,6 +12,7 @@
12 12  
13 13 #ifdef CONFIG_PROFILING
14 14  
  15 +#include <linux/dcache.h>
15 16 #include <linux/types.h>
16 17  
17 18 struct dcookie_user;
include/linux/elf-em.h
... ... @@ -20,7 +20,8 @@
20 20 #define EM_PARISC 15 /* HPPA */
21 21 #define EM_SPARC32PLUS 18 /* Sun's "v8plus" */
22 22 #define EM_PPC 20 /* PowerPC */
23   -#define EM_PPC64 21 /* PowerPC64 */
  23 +#define EM_PPC64 21 /* PowerPC64 */
  24 +#define EM_SPU 23 /* Cell BE SPU */
24 25 #define EM_SH 42 /* SuperH */
25 26 #define EM_SPARCV9 43 /* SPARC v9 64-bit */
26 27 #define EM_IA_64 50 /* HP/Intel IA-64 */
include/linux/oprofile.h
... ... @@ -17,6 +17,26 @@
17 17 #include <linux/spinlock.h>
18 18 #include <asm/atomic.h>
19 19  
  20 +/* Each escaped entry is prefixed by ESCAPE_CODE
  21 + * then one of the following codes, then the
  22 + * relevant data.
  23 + * These #defines live in this file so that arch-specific
  24 + * buffer sync'ing code can access them.
  25 + */
  26 +#define ESCAPE_CODE ~0UL
  27 +#define CTX_SWITCH_CODE 1
  28 +#define CPU_SWITCH_CODE 2
  29 +#define COOKIE_SWITCH_CODE 3
  30 +#define KERNEL_ENTER_SWITCH_CODE 4
  31 +#define KERNEL_EXIT_SWITCH_CODE 5
  32 +#define MODULE_LOADED_CODE 6
  33 +#define CTX_TGID_CODE 7
  34 +#define TRACE_BEGIN_CODE 8
  35 +#define TRACE_END_CODE 9
  36 +#define XEN_ENTER_SWITCH_CODE 10
  37 +#define SPU_PROFILING_CODE 11
  38 +#define SPU_CTX_SWITCH_CODE 12
  39 +
20 40 struct super_block;
21 41 struct dentry;
22 42 struct file_operations;
... ... @@ -35,6 +55,14 @@
35 55 int (*start)(void);
36 56 /* Stop delivering interrupts. */
37 57 void (*stop)(void);
  58 + /* Arch-specific buffer sync functions.
  59 + * Return value = 0: Success
  60 + * Return value = -1: Failure
  61 + * Return value = 1: Run generic sync function
  62 + */
  63 + int (*sync_start)(void);
  64 + int (*sync_stop)(void);
  65 +
38 66 /* Initiate a stack backtrace. Optional. */
39 67 void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
40 68 /* CPU identification string. */
... ... @@ -54,6 +82,13 @@
54 82 * One-time exit/cleanup for the arch.
55 83 */
56 84 void oprofile_arch_exit(void);
  85 +
  86 +/**
  87 + * Add data to the event buffer.
  88 + * The data passed is free-form, but typically consists of
  89 + * file offsets, dcookies, context information, and ESCAPE codes.
  90 + */
  91 +void add_event_entry(unsigned long data);
57 92  
58 93 /**
59 94 * Add a sample. This may be called from any context. Pass