Commit 1b3862798cf4390b9110e54e405646e156f47c83

Authored by Linus Torvalds

Merge branch 'tools' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-idle-2.6

* 'tools' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-idle-2.6:
  tools: create power/x86/x86_energy_perf_policy
  tools: create power/x86/turbostat

Showing 6 changed files Side-by-side Diff

tools/power/x86/turbostat/Makefile
  1 +turbostat : turbostat.c
  2 +
  3 +clean :
  4 + rm -f turbostat
  5 +
  6 +install :
  7 + install turbostat /usr/bin/turbostat
  8 + install turbostat.8 /usr/share/man/man8
tools/power/x86/turbostat/turbostat.8
  1 +.TH TURBOSTAT 8
  2 +.SH NAME
  3 +turbostat \- Report processor frequency and idle statistics
  4 +.SH SYNOPSIS
  5 +.ft B
  6 +.B turbostat
  7 +.RB [ "\-v" ]
  8 +.RB [ "\-M MSR#" ]
  9 +.RB command
  10 +.br
  11 +.B turbostat
  12 +.RB [ "\-v" ]
  13 +.RB [ "\-M MSR#" ]
  14 +.RB [ "\-i interval_sec" ]
  15 +.SH DESCRIPTION
  16 +\fBturbostat \fP reports processor topology, frequency
  17 +and idle power state statistics on modern X86 processors.
  18 +Either \fBcommand\fP is forked and statistics are printed
  19 +upon its completion, or statistics are printed periodically.
  20 +
  21 +\fBturbostat \fP
  22 +requires that the processor
  23 +supports an "invariant" TSC, plus the APERF and MPERF MSRs.
  24 +\fBturbostat \fP will report idle cpu power state residency
  25 +on processors that additionally support C-state residency counters.
  26 +
  27 +.SS Options
  28 +The \fB-v\fP option increases verbosity.
  29 +.PP
  30 +The \fB-M MSR#\fP option dumps the specified MSR,
  31 +in addition to the usual frequency and idle statistics.
  32 +.PP
  33 +The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds.
  34 +The default is 5 seconds.
  35 +.PP
  36 +The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit,
  37 +displays the statistics gathered since it was forked.
  38 +.PP
  39 +.SH FIELD DESCRIPTIONS
  40 +.nf
  41 +\fBpkg\fP processor package number.
  42 +\fBcore\fP processor core number.
  43 +\fBCPU\fP Linux CPU (logical processor) number.
  44 +\fB%c0\fP percent of the interval that the CPU retired instructions.
  45 +\fBGHz\fP average clock rate while the CPU was in c0 state.
  46 +\fBTSC\fP average GHz that the TSC ran during the entire interval.
  47 +\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states.
  48 +\fB%pc3, %pc6\fP percentage residency in hardware package idle states.
  49 +.fi
  50 +.PP
  51 +.SH EXAMPLE
  52 +Without any parameters, turbostat prints out counters ever 5 seconds.
  53 +(override interval with "-i sec" option, or specify a command
  54 +for turbostat to fork).
  55 +
  56 +The first row of statistics reflect the average for the entire system.
  57 +Subsequent rows show per-CPU statistics.
  58 +
  59 +.nf
  60 +[root@x980]# ./turbostat
  61 +core CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
  62 + 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07
  63 + 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07
  64 + 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07
  65 + 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07
  66 + 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07
  67 + 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07
  68 + 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07
  69 + 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07
  70 + 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07
  71 + 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07
  72 + 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07
  73 + 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07
  74 + 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07
  75 +.fi
  76 +.SH VERBOSE EXAMPLE
  77 +The "-v" option adds verbosity to the output:
  78 +
  79 +.nf
  80 +GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2)
  81 +12 * 133 = 1600 MHz max efficiency
  82 +25 * 133 = 3333 MHz TSC frequency
  83 +26 * 133 = 3467 MHz max turbo 4 active cores
  84 +26 * 133 = 3467 MHz max turbo 3 active cores
  85 +27 * 133 = 3600 MHz max turbo 2 active cores
  86 +27 * 133 = 3600 MHz max turbo 1 active cores
  87 +
  88 +.fi
  89 +The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
  90 +available at the minimum package voltage. The \fBTSC frequency\fP is the nominal
  91 +maximum frequency of the processor if turbo-mode were not available. This frequency
  92 +should be sustainable on all CPUs indefinitely, given nominal power and cooling.
  93 +The remaining rows show what maximum turbo frequency is possible
  94 +depending on the number of idle cores. Note that this information is
  95 +not available on all processors.
  96 +.SH FORK EXAMPLE
  97 +If turbostat is invoked with a command, it will fork that command
  98 +and output the statistics gathered when the command exits.
  99 +eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
  100 +until ^C while the other CPUs are mostly idle:
  101 +
  102 +.nf
  103 +[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
  104 +
  105 +^Ccore CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
  106 + 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00
  107 + 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00
  108 + 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00
  109 + 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00
  110 + 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00
  111 + 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00
  112 + 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00
  113 + 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00
  114 + 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00
  115 + 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00
  116 + 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00
  117 + 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00
  118 + 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00
  119 +6.950866 sec
  120 +
  121 +.fi
  122 +Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit
  123 +while the other processors are generally in various states of idle.
  124 +
  125 +Note that cpu3 is an HT sibling sharing core9
  126 +with cpu9, and thus it is unable to get to an idle state
  127 +deeper than c1 while cpu9 is busy.
  128 +
  129 +Note that turbostat reports average GHz of 3.61, while
  130 +the arithmetic average of the GHz column above is 3.24.
  131 +This is a weighted average, where the weight is %c0. ie. it is the total number of
  132 +un-halted cycles elapsed per time divided by the number of CPUs.
  133 +.SH NOTES
  134 +
  135 +.B "turbostat "
  136 +must be run as root.
  137 +
  138 +.B "turbostat "
  139 +reads hardware counters, but doesn't write them.
  140 +So it will not interfere with the OS or other programs, including
  141 +multiple invocations of itself.
  142 +
  143 +\fBturbostat \fP
  144 +may work poorly on Linux-2.6.20 through 2.6.29,
  145 +as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF
  146 +in those kernels.
  147 +
  148 +The APERF, MPERF MSRs are defined to count non-halted cycles.
  149 +Although it is not guaranteed by the architecture, turbostat assumes
  150 +that they count at TSC rate, which is true on all processors tested to date.
  151 +
  152 +.SH REFERENCES
  153 +"Intelยฎ Turbo Boost Technology
  154 +in Intelยฎ Coreโ„ข Microarchitecture (Nehalem) Based Processors"
  155 +http://download.intel.com/design/processor/applnots/320354.pdf
  156 +
  157 +"Intelยฎ 64 and IA-32 Architectures Software Developer's Manual
  158 +Volume 3B: System Programming Guide"
  159 +http://www.intel.com/products/processor/manuals/
  160 +
  161 +.SH FILES
  162 +.ta
  163 +.nf
  164 +/dev/cpu/*/msr
  165 +.fi
  166 +
  167 +.SH "SEE ALSO"
  168 +msr(4), vmstat(8)
  169 +.PP
  170 +.SH AUTHORS
  171 +.nf
  172 +Written by Len Brown <len.brown@intel.com>
tools/power/x86/turbostat/turbostat.c
Changes suppressed. Click to show
  1 +/*
  2 + * turbostat -- show CPU frequency and C-state residency
  3 + * on modern Intel turbo-capable processors.
  4 + *
  5 + * Copyright (c) 2010, Intel Corporation.
  6 + * Len Brown <len.brown@intel.com>
  7 + *
  8 + * This program is free software; you can redistribute it and/or modify it
  9 + * under the terms and conditions of the GNU General Public License,
  10 + * version 2, as published by the Free Software Foundation.
  11 + *
  12 + * This program is distributed in the hope it will be useful, but WITHOUT
  13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  15 + * more details.
  16 + *
  17 + * You should have received a copy of the GNU General Public License along with
  18 + * this program; if not, write to the Free Software Foundation, Inc.,
  19 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  20 + */
  21 +
  22 +#include <stdio.h>
  23 +#include <unistd.h>
  24 +#include <sys/types.h>
  25 +#include <sys/wait.h>
  26 +#include <sys/stat.h>
  27 +#include <sys/resource.h>
  28 +#include <fcntl.h>
  29 +#include <signal.h>
  30 +#include <sys/time.h>
  31 +#include <stdlib.h>
  32 +#include <dirent.h>
  33 +#include <string.h>
  34 +#include <ctype.h>
  35 +
  36 +#define MSR_TSC 0x10
  37 +#define MSR_NEHALEM_PLATFORM_INFO 0xCE
  38 +#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD
  39 +#define MSR_APERF 0xE8
  40 +#define MSR_MPERF 0xE7
  41 +#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */
  42 +#define MSR_PKG_C3_RESIDENCY 0x3F8
  43 +#define MSR_PKG_C6_RESIDENCY 0x3F9
  44 +#define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */
  45 +#define MSR_CORE_C3_RESIDENCY 0x3FC
  46 +#define MSR_CORE_C6_RESIDENCY 0x3FD
  47 +#define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */
  48 +
  49 +char *proc_stat = "/proc/stat";
  50 +unsigned int interval_sec = 5; /* set with -i interval_sec */
  51 +unsigned int verbose; /* set with -v */
  52 +unsigned int skip_c0;
  53 +unsigned int skip_c1;
  54 +unsigned int do_nhm_cstates;
  55 +unsigned int do_snb_cstates;
  56 +unsigned int has_aperf;
  57 +unsigned int units = 1000000000; /* Ghz etc */
  58 +unsigned int genuine_intel;
  59 +unsigned int has_invariant_tsc;
  60 +unsigned int do_nehalem_platform_info;
  61 +unsigned int do_nehalem_turbo_ratio_limit;
  62 +unsigned int extra_msr_offset;
  63 +double bclk;
  64 +unsigned int show_pkg;
  65 +unsigned int show_core;
  66 +unsigned int show_cpu;
  67 +
  68 +int aperf_mperf_unstable;
  69 +int backwards_count;
  70 +char *progname;
  71 +int need_reinitialize;
  72 +
  73 +int num_cpus;
  74 +
  75 +typedef struct per_cpu_counters {
  76 + unsigned long long tsc; /* per thread */
  77 + unsigned long long aperf; /* per thread */
  78 + unsigned long long mperf; /* per thread */
  79 + unsigned long long c1; /* per thread (calculated) */
  80 + unsigned long long c3; /* per core */
  81 + unsigned long long c6; /* per core */
  82 + unsigned long long c7; /* per core */
  83 + unsigned long long pc2; /* per package */
  84 + unsigned long long pc3; /* per package */
  85 + unsigned long long pc6; /* per package */
  86 + unsigned long long pc7; /* per package */
  87 + unsigned long long extra_msr; /* per thread */
  88 + int pkg;
  89 + int core;
  90 + int cpu;
  91 + struct per_cpu_counters *next;
  92 +} PCC;
  93 +
  94 +PCC *pcc_even;
  95 +PCC *pcc_odd;
  96 +PCC *pcc_delta;
  97 +PCC *pcc_average;
  98 +struct timeval tv_even;
  99 +struct timeval tv_odd;
  100 +struct timeval tv_delta;
  101 +
  102 +unsigned long long get_msr(int cpu, off_t offset)
  103 +{
  104 + ssize_t retval;
  105 + unsigned long long msr;
  106 + char pathname[32];
  107 + int fd;
  108 +
  109 + sprintf(pathname, "/dev/cpu/%d/msr", cpu);
  110 + fd = open(pathname, O_RDONLY);
  111 + if (fd < 0) {
  112 + perror(pathname);
  113 + need_reinitialize = 1;
  114 + return 0;
  115 + }
  116 +
  117 + retval = pread(fd, &msr, sizeof msr, offset);
  118 + if (retval != sizeof msr) {
  119 + fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n",
  120 + cpu, offset, retval);
  121 + exit(-2);
  122 + }
  123 +
  124 + close(fd);
  125 + return msr;
  126 +}
  127 +
  128 +void print_header()
  129 +{
  130 + if (show_pkg)
  131 + fprintf(stderr, "pkg ");
  132 + if (show_core)
  133 + fprintf(stderr, "core");
  134 + if (show_cpu)
  135 + fprintf(stderr, " CPU");
  136 + if (do_nhm_cstates)
  137 + fprintf(stderr, " %%c0 ");
  138 + if (has_aperf)
  139 + fprintf(stderr, " GHz");
  140 + fprintf(stderr, " TSC");
  141 + if (do_nhm_cstates)
  142 + fprintf(stderr, " %%c1 ");
  143 + if (do_nhm_cstates)
  144 + fprintf(stderr, " %%c3 ");
  145 + if (do_nhm_cstates)
  146 + fprintf(stderr, " %%c6 ");
  147 + if (do_snb_cstates)
  148 + fprintf(stderr, " %%c7 ");
  149 + if (do_snb_cstates)
  150 + fprintf(stderr, " %%pc2 ");
  151 + if (do_nhm_cstates)
  152 + fprintf(stderr, " %%pc3 ");
  153 + if (do_nhm_cstates)
  154 + fprintf(stderr, " %%pc6 ");
  155 + if (do_snb_cstates)
  156 + fprintf(stderr, " %%pc7 ");
  157 + if (extra_msr_offset)
  158 + fprintf(stderr, " MSR 0x%x ", extra_msr_offset);
  159 +
  160 + putc('\n', stderr);
  161 +}
  162 +
  163 +void dump_pcc(PCC *pcc)
  164 +{
  165 + fprintf(stderr, "package: %d ", pcc->pkg);
  166 + fprintf(stderr, "core:: %d ", pcc->core);
  167 + fprintf(stderr, "CPU: %d ", pcc->cpu);
  168 + fprintf(stderr, "TSC: %016llX\n", pcc->tsc);
  169 + fprintf(stderr, "c3: %016llX\n", pcc->c3);
  170 + fprintf(stderr, "c6: %016llX\n", pcc->c6);
  171 + fprintf(stderr, "c7: %016llX\n", pcc->c7);
  172 + fprintf(stderr, "aperf: %016llX\n", pcc->aperf);
  173 + fprintf(stderr, "pc2: %016llX\n", pcc->pc2);
  174 + fprintf(stderr, "pc3: %016llX\n", pcc->pc3);
  175 + fprintf(stderr, "pc6: %016llX\n", pcc->pc6);
  176 + fprintf(stderr, "pc7: %016llX\n", pcc->pc7);
  177 + fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, pcc->extra_msr);
  178 +}
  179 +
  180 +void dump_list(PCC *pcc)
  181 +{
  182 + printf("dump_list 0x%p\n", pcc);
  183 +
  184 + for (; pcc; pcc = pcc->next)
  185 + dump_pcc(pcc);
  186 +}
  187 +
  188 +void print_pcc(PCC *p)
  189 +{
  190 + double interval_float;
  191 +
  192 + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
  193 +
  194 + /* topology columns, print blanks on 1st (average) line */
  195 + if (p == pcc_average) {
  196 + if (show_pkg)
  197 + fprintf(stderr, " ");
  198 + if (show_core)
  199 + fprintf(stderr, " ");
  200 + if (show_cpu)
  201 + fprintf(stderr, " ");
  202 + } else {
  203 + if (show_pkg)
  204 + fprintf(stderr, "%4d", p->pkg);
  205 + if (show_core)
  206 + fprintf(stderr, "%4d", p->core);
  207 + if (show_cpu)
  208 + fprintf(stderr, "%4d", p->cpu);
  209 + }
  210 +
  211 + /* %c0 */
  212 + if (do_nhm_cstates) {
  213 + if (!skip_c0)
  214 + fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc);
  215 + else
  216 + fprintf(stderr, " ****");
  217 + }
  218 +
  219 + /* GHz */
  220 + if (has_aperf) {
  221 + if (!aperf_mperf_unstable) {
  222 + fprintf(stderr, "%5.2f",
  223 + 1.0 * p->tsc / units * p->aperf /
  224 + p->mperf / interval_float);
  225 + } else {
  226 + if (p->aperf > p->tsc || p->mperf > p->tsc) {
  227 + fprintf(stderr, " ****");
  228 + } else {
  229 + fprintf(stderr, "%4.1f*",
  230 + 1.0 * p->tsc /
  231 + units * p->aperf /
  232 + p->mperf / interval_float);
  233 + }
  234 + }
  235 + }
  236 +
  237 + /* TSC */
  238 + fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float);
  239 +
  240 + if (do_nhm_cstates) {
  241 + if (!skip_c1)
  242 + fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
  243 + else
  244 + fprintf(stderr, " ****");
  245 + }
  246 + if (do_nhm_cstates)
  247 + fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc);
  248 + if (do_nhm_cstates)
  249 + fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc);
  250 + if (do_snb_cstates)
  251 + fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc);
  252 + if (do_snb_cstates)
  253 + fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc);
  254 + if (do_nhm_cstates)
  255 + fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc);
  256 + if (do_nhm_cstates)
  257 + fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc);
  258 + if (do_snb_cstates)
  259 + fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc);
  260 + if (extra_msr_offset)
  261 + fprintf(stderr, " 0x%016llx", p->extra_msr);
  262 + putc('\n', stderr);
  263 +}
  264 +
  265 +void print_counters(PCC *cnt)
  266 +{
  267 + PCC *pcc;
  268 +
  269 + print_header();
  270 +
  271 + if (num_cpus > 1)
  272 + print_pcc(pcc_average);
  273 +
  274 + for (pcc = cnt; pcc != NULL; pcc = pcc->next)
  275 + print_pcc(pcc);
  276 +
  277 +}
  278 +
  279 +#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after))
  280 +
  281 +
  282 +int compute_delta(PCC *after, PCC *before, PCC *delta)
  283 +{
  284 + int errors = 0;
  285 + int perf_err = 0;
  286 +
  287 + skip_c0 = skip_c1 = 0;
  288 +
  289 + for ( ; after && before && delta;
  290 + after = after->next, before = before->next, delta = delta->next) {
  291 + if (before->cpu != after->cpu) {
  292 + printf("cpu configuration changed: %d != %d\n",
  293 + before->cpu, after->cpu);
  294 + return -1;
  295 + }
  296 +
  297 + if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) {
  298 + fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n",
  299 + before->cpu, before->tsc, after->tsc);
  300 + errors++;
  301 + }
  302 + /* check for TSC < 1 Mcycles over interval */
  303 + if (delta->tsc < (1000 * 1000)) {
  304 + fprintf(stderr, "Insanely slow TSC rate,"
  305 + " TSC stops in idle?\n");
  306 + fprintf(stderr, "You can disable all c-states"
  307 + " by booting with \"idle=poll\"\n");
  308 + fprintf(stderr, "or just the deep ones with"
  309 + " \"processor.max_cstate=1\"\n");
  310 + exit(-3);
  311 + }
  312 + if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) {
  313 + fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n",
  314 + before->cpu, before->c3, after->c3);
  315 + errors++;
  316 + }
  317 + if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) {
  318 + fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n",
  319 + before->cpu, before->c6, after->c6);
  320 + errors++;
  321 + }
  322 + if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) {
  323 + fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n",
  324 + before->cpu, before->c7, after->c7);
  325 + errors++;
  326 + }
  327 + if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) {
  328 + fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n",
  329 + before->cpu, before->pc2, after->pc2);
  330 + errors++;
  331 + }
  332 + if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) {
  333 + fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n",
  334 + before->cpu, before->pc3, after->pc3);
  335 + errors++;
  336 + }
  337 + if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) {
  338 + fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n",
  339 + before->cpu, before->pc6, after->pc6);
  340 + errors++;
  341 + }
  342 + if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) {
  343 + fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n",
  344 + before->cpu, before->pc7, after->pc7);
  345 + errors++;
  346 + }
  347 +
  348 + perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf);
  349 + if (perf_err) {
  350 + fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n",
  351 + before->cpu, before->aperf, after->aperf);
  352 + }
  353 + perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf);
  354 + if (perf_err) {
  355 + fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n",
  356 + before->cpu, before->mperf, after->mperf);
  357 + }
  358 + if (perf_err) {
  359 + if (!aperf_mperf_unstable) {
  360 + fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
  361 + fprintf(stderr, "* Frequency results do not cover entire interval *\n");
  362 + fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
  363 +
  364 + aperf_mperf_unstable = 1;
  365 + }
  366 + /*
  367 + * mperf delta is likely a huge "positive" number
  368 + * can not use it for calculating c0 time
  369 + */
  370 + skip_c0 = 1;
  371 + skip_c1 = 1;
  372 + }
  373 +
  374 + /*
  375 + * As mperf and tsc collection are not atomic,
  376 + * it is possible for mperf's non-halted cycles
  377 + * to exceed TSC's all cycles: show c1 = 0% in that case.
  378 + */
  379 + if (delta->mperf > delta->tsc)
  380 + delta->c1 = 0;
  381 + else /* normal case, derive c1 */
  382 + delta->c1 = delta->tsc - delta->mperf
  383 + - delta->c3 - delta->c6 - delta->c7;
  384 +
  385 + if (delta->mperf == 0)
  386 + delta->mperf = 1; /* divide by 0 protection */
  387 +
  388 + /*
  389 + * for "extra msr", just copy the latest w/o subtracting
  390 + */
  391 + delta->extra_msr = after->extra_msr;
  392 + if (errors) {
  393 + fprintf(stderr, "ERROR cpu%d before:\n", before->cpu);
  394 + dump_pcc(before);
  395 + fprintf(stderr, "ERROR cpu%d after:\n", before->cpu);
  396 + dump_pcc(after);
  397 + errors = 0;
  398 + }
  399 + }
  400 + return 0;
  401 +}
  402 +
  403 +void compute_average(PCC *delta, PCC *avg)
  404 +{
  405 + PCC *sum;
  406 +
  407 + sum = calloc(1, sizeof(PCC));
  408 + if (sum == NULL) {
  409 + perror("calloc sum");
  410 + exit(1);
  411 + }
  412 +
  413 + for (; delta; delta = delta->next) {
  414 + sum->tsc += delta->tsc;
  415 + sum->c1 += delta->c1;
  416 + sum->c3 += delta->c3;
  417 + sum->c6 += delta->c6;
  418 + sum->c7 += delta->c7;
  419 + sum->aperf += delta->aperf;
  420 + sum->mperf += delta->mperf;
  421 + sum->pc2 += delta->pc2;
  422 + sum->pc3 += delta->pc3;
  423 + sum->pc6 += delta->pc6;
  424 + sum->pc7 += delta->pc7;
  425 + }
  426 + avg->tsc = sum->tsc/num_cpus;
  427 + avg->c1 = sum->c1/num_cpus;
  428 + avg->c3 = sum->c3/num_cpus;
  429 + avg->c6 = sum->c6/num_cpus;
  430 + avg->c7 = sum->c7/num_cpus;
  431 + avg->aperf = sum->aperf/num_cpus;
  432 + avg->mperf = sum->mperf/num_cpus;
  433 + avg->pc2 = sum->pc2/num_cpus;
  434 + avg->pc3 = sum->pc3/num_cpus;
  435 + avg->pc6 = sum->pc6/num_cpus;
  436 + avg->pc7 = sum->pc7/num_cpus;
  437 +
  438 + free(sum);
  439 +}
  440 +
  441 +void get_counters(PCC *pcc)
  442 +{
  443 + for ( ; pcc; pcc = pcc->next) {
  444 + pcc->tsc = get_msr(pcc->cpu, MSR_TSC);
  445 + if (do_nhm_cstates)
  446 + pcc->c3 = get_msr(pcc->cpu, MSR_CORE_C3_RESIDENCY);
  447 + if (do_nhm_cstates)
  448 + pcc->c6 = get_msr(pcc->cpu, MSR_CORE_C6_RESIDENCY);
  449 + if (do_snb_cstates)
  450 + pcc->c7 = get_msr(pcc->cpu, MSR_CORE_C7_RESIDENCY);
  451 + if (has_aperf)
  452 + pcc->aperf = get_msr(pcc->cpu, MSR_APERF);
  453 + if (has_aperf)
  454 + pcc->mperf = get_msr(pcc->cpu, MSR_MPERF);
  455 + if (do_snb_cstates)
  456 + pcc->pc2 = get_msr(pcc->cpu, MSR_PKG_C2_RESIDENCY);
  457 + if (do_nhm_cstates)
  458 + pcc->pc3 = get_msr(pcc->cpu, MSR_PKG_C3_RESIDENCY);
  459 + if (do_nhm_cstates)
  460 + pcc->pc6 = get_msr(pcc->cpu, MSR_PKG_C6_RESIDENCY);
  461 + if (do_snb_cstates)
  462 + pcc->pc7 = get_msr(pcc->cpu, MSR_PKG_C7_RESIDENCY);
  463 + if (extra_msr_offset)
  464 + pcc->extra_msr = get_msr(pcc->cpu, extra_msr_offset);
  465 + }
  466 +}
  467 +
  468 +
  469 +void print_nehalem_info()
  470 +{
  471 + unsigned long long msr;
  472 + unsigned int ratio;
  473 +
  474 + if (!do_nehalem_platform_info)
  475 + return;
  476 +
  477 + msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO);
  478 +
  479 + ratio = (msr >> 40) & 0xFF;
  480 + fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
  481 + ratio, bclk, ratio * bclk);
  482 +
  483 + ratio = (msr >> 8) & 0xFF;
  484 + fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
  485 + ratio, bclk, ratio * bclk);
  486 +
  487 + if (verbose > 1)
  488 + fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
  489 +
  490 + if (!do_nehalem_turbo_ratio_limit)
  491 + return;
  492 +
  493 + msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT);
  494 +
  495 + ratio = (msr >> 24) & 0xFF;
  496 + if (ratio)
  497 + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
  498 + ratio, bclk, ratio * bclk);
  499 +
  500 + ratio = (msr >> 16) & 0xFF;
  501 + if (ratio)
  502 + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
  503 + ratio, bclk, ratio * bclk);
  504 +
  505 + ratio = (msr >> 8) & 0xFF;
  506 + if (ratio)
  507 + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
  508 + ratio, bclk, ratio * bclk);
  509 +
  510 + ratio = (msr >> 0) & 0xFF;
  511 + if (ratio)
  512 + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
  513 + ratio, bclk, ratio * bclk);
  514 +
  515 +}
  516 +
  517 +void free_counter_list(PCC *list)
  518 +{
  519 + PCC *p;
  520 +
  521 + for (p = list; p; ) {
  522 + PCC *free_me;
  523 +
  524 + free_me = p;
  525 + p = p->next;
  526 + free(free_me);
  527 + }
  528 + return;
  529 +}
  530 +
  531 +void free_all_counters(void)
  532 +{
  533 + free_counter_list(pcc_even);
  534 + pcc_even = NULL;
  535 +
  536 + free_counter_list(pcc_odd);
  537 + pcc_odd = NULL;
  538 +
  539 + free_counter_list(pcc_delta);
  540 + pcc_delta = NULL;
  541 +
  542 + free_counter_list(pcc_average);
  543 + pcc_average = NULL;
  544 +}
  545 +
  546 +void insert_cpu_counters(PCC **list, PCC *new)
  547 +{
  548 + PCC *prev;
  549 +
  550 + /*
  551 + * list was empty
  552 + */
  553 + if (*list == NULL) {
  554 + new->next = *list;
  555 + *list = new;
  556 + return;
  557 + }
  558 +
  559 + show_cpu = 1; /* there is more than one CPU */
  560 +
  561 + /*
  562 + * insert on front of list.
  563 + * It is sorted by ascending package#, core#, cpu#
  564 + */
  565 + if (((*list)->pkg > new->pkg) ||
  566 + (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) ||
  567 + (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) {
  568 + new->next = *list;
  569 + *list = new;
  570 + return;
  571 + }
  572 +
  573 + prev = *list;
  574 +
  575 + while (prev->next && (prev->next->pkg < new->pkg)) {
  576 + prev = prev->next;
  577 + show_pkg = 1; /* there is more than 1 package */
  578 + }
  579 +
  580 + while (prev->next && (prev->next->pkg == new->pkg)
  581 + && (prev->next->core < new->core)) {
  582 + prev = prev->next;
  583 + show_core = 1; /* there is more than 1 core */
  584 + }
  585 +
  586 + while (prev->next && (prev->next->pkg == new->pkg)
  587 + && (prev->next->core == new->core)
  588 + && (prev->next->cpu < new->cpu)) {
  589 + prev = prev->next;
  590 + }
  591 +
  592 + /*
  593 + * insert after "prev"
  594 + */
  595 + new->next = prev->next;
  596 + prev->next = new;
  597 +
  598 + return;
  599 +}
  600 +
  601 +void alloc_new_cpu_counters(int pkg, int core, int cpu)
  602 +{
  603 + PCC *new;
  604 +
  605 + if (verbose > 1)
  606 + printf("pkg%d core%d, cpu%d\n", pkg, core, cpu);
  607 +
  608 + new = (PCC *)calloc(1, sizeof(PCC));
  609 + if (new == NULL) {
  610 + perror("calloc");
  611 + exit(1);
  612 + }
  613 + new->pkg = pkg;
  614 + new->core = core;
  615 + new->cpu = cpu;
  616 + insert_cpu_counters(&pcc_odd, new);
  617 +
  618 + new = (PCC *)calloc(1, sizeof(PCC));
  619 + if (new == NULL) {
  620 + perror("calloc");
  621 + exit(1);
  622 + }
  623 + new->pkg = pkg;
  624 + new->core = core;
  625 + new->cpu = cpu;
  626 + insert_cpu_counters(&pcc_even, new);
  627 +
  628 + new = (PCC *)calloc(1, sizeof(PCC));
  629 + if (new == NULL) {
  630 + perror("calloc");
  631 + exit(1);
  632 + }
  633 + new->pkg = pkg;
  634 + new->core = core;
  635 + new->cpu = cpu;
  636 + insert_cpu_counters(&pcc_delta, new);
  637 +
  638 + new = (PCC *)calloc(1, sizeof(PCC));
  639 + if (new == NULL) {
  640 + perror("calloc");
  641 + exit(1);
  642 + }
  643 + new->pkg = pkg;
  644 + new->core = core;
  645 + new->cpu = cpu;
  646 + pcc_average = new;
  647 +}
  648 +
  649 +int get_physical_package_id(int cpu)
  650 +{
  651 + char path[64];
  652 + FILE *filep;
  653 + int pkg;
  654 +
  655 + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
  656 + filep = fopen(path, "r");
  657 + if (filep == NULL) {
  658 + perror(path);
  659 + exit(1);
  660 + }
  661 + fscanf(filep, "%d", &pkg);
  662 + fclose(filep);
  663 + return pkg;
  664 +}
  665 +
  666 +int get_core_id(int cpu)
  667 +{
  668 + char path[64];
  669 + FILE *filep;
  670 + int core;
  671 +
  672 + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
  673 + filep = fopen(path, "r");
  674 + if (filep == NULL) {
  675 + perror(path);
  676 + exit(1);
  677 + }
  678 + fscanf(filep, "%d", &core);
  679 + fclose(filep);
  680 + return core;
  681 +}
  682 +
  683 +/*
  684 + * run func(index, cpu) on every cpu in /proc/stat
  685 + */
  686 +
  687 +int for_all_cpus(void (func)(int, int, int))
  688 +{
  689 + FILE *fp;
  690 + int cpu_count;
  691 + int retval;
  692 +
  693 + fp = fopen(proc_stat, "r");
  694 + if (fp == NULL) {
  695 + perror(proc_stat);
  696 + exit(1);
  697 + }
  698 +
  699 + retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
  700 + if (retval != 0) {
  701 + perror("/proc/stat format");
  702 + exit(1);
  703 + }
  704 +
  705 + for (cpu_count = 0; ; cpu_count++) {
  706 + int cpu;
  707 +
  708 + retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu);
  709 + if (retval != 1)
  710 + break;
  711 +
  712 + func(get_physical_package_id(cpu), get_core_id(cpu), cpu);
  713 + }
  714 + fclose(fp);
  715 + return cpu_count;
  716 +}
  717 +
  718 +void re_initialize(void)
  719 +{
  720 + printf("turbostat: topology changed, re-initializing.\n");
  721 + free_all_counters();
  722 + num_cpus = for_all_cpus(alloc_new_cpu_counters);
  723 + need_reinitialize = 0;
  724 + printf("num_cpus is now %d\n", num_cpus);
  725 +}
  726 +
  727 +void dummy(int pkg, int core, int cpu) { return; }
  728 +/*
  729 + * check to see if a cpu came on-line
  730 + */
  731 +void verify_num_cpus()
  732 +{
  733 + int new_num_cpus;
  734 +
  735 + new_num_cpus = for_all_cpus(dummy);
  736 +
  737 + if (new_num_cpus != num_cpus) {
  738 + if (verbose)
  739 + printf("num_cpus was %d, is now %d\n",
  740 + num_cpus, new_num_cpus);
  741 + need_reinitialize = 1;
  742 + }
  743 +
  744 + return;
  745 +}
  746 +
  747 +void turbostat_loop()
  748 +{
  749 +restart:
  750 + get_counters(pcc_even);
  751 + gettimeofday(&tv_even, (struct timezone *)NULL);
  752 +
  753 + while (1) {
  754 + verify_num_cpus();
  755 + if (need_reinitialize) {
  756 + re_initialize();
  757 + goto restart;
  758 + }
  759 + sleep(interval_sec);
  760 + get_counters(pcc_odd);
  761 + gettimeofday(&tv_odd, (struct timezone *)NULL);
  762 +
  763 + compute_delta(pcc_odd, pcc_even, pcc_delta);
  764 + timersub(&tv_odd, &tv_even, &tv_delta);
  765 + compute_average(pcc_delta, pcc_average);
  766 + print_counters(pcc_delta);
  767 + if (need_reinitialize) {
  768 + re_initialize();
  769 + goto restart;
  770 + }
  771 + sleep(interval_sec);
  772 + get_counters(pcc_even);
  773 + gettimeofday(&tv_even, (struct timezone *)NULL);
  774 + compute_delta(pcc_even, pcc_odd, pcc_delta);
  775 + timersub(&tv_even, &tv_odd, &tv_delta);
  776 + compute_average(pcc_delta, pcc_average);
  777 + print_counters(pcc_delta);
  778 + }
  779 +}
  780 +
  781 +void check_dev_msr()
  782 +{
  783 + struct stat sb;
  784 +
  785 + if (stat("/dev/cpu/0/msr", &sb)) {
  786 + fprintf(stderr, "no /dev/cpu/0/msr\n");
  787 + fprintf(stderr, "Try \"# modprobe msr\"\n");
  788 + exit(-5);
  789 + }
  790 +}
  791 +
  792 +void check_super_user()
  793 +{
  794 + if (getuid() != 0) {
  795 + fprintf(stderr, "must be root\n");
  796 + exit(-6);
  797 + }
  798 +}
  799 +
  800 +int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
  801 +{
  802 + if (!genuine_intel)
  803 + return 0;
  804 +
  805 + if (family != 6)
  806 + return 0;
  807 +
  808 + switch (model) {
  809 + case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
  810 + case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
  811 + case 0x1F: /* Core i7 and i5 Processor - Nehalem */
  812 + case 0x25: /* Westmere Client - Clarkdale, Arrandale */
  813 + case 0x2C: /* Westmere EP - Gulftown */
  814 + case 0x2A: /* SNB */
  815 + case 0x2D: /* SNB Xeon */
  816 + return 1;
  817 + case 0x2E: /* Nehalem-EX Xeon - Beckton */
  818 + case 0x2F: /* Westmere-EX Xeon - Eagleton */
  819 + default:
  820 + return 0;
  821 + }
  822 +}
  823 +
  824 +int is_snb(unsigned int family, unsigned int model)
  825 +{
  826 + if (!genuine_intel)
  827 + return 0;
  828 +
  829 + switch (model) {
  830 + case 0x2A:
  831 + case 0x2D:
  832 + return 1;
  833 + }
  834 + return 0;
  835 +}
  836 +
  837 +double discover_bclk(unsigned int family, unsigned int model)
  838 +{
  839 + if (is_snb(family, model))
  840 + return 100.00;
  841 + else
  842 + return 133.33;
  843 +}
  844 +
  845 +void check_cpuid()
  846 +{
  847 + unsigned int eax, ebx, ecx, edx, max_level;
  848 + unsigned int fms, family, model, stepping;
  849 +
  850 + eax = ebx = ecx = edx = 0;
  851 +
  852 + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0));
  853 +
  854 + if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
  855 + genuine_intel = 1;
  856 +
  857 + if (verbose)
  858 + fprintf(stderr, "%.4s%.4s%.4s ",
  859 + (char *)&ebx, (char *)&edx, (char *)&ecx);
  860 +
  861 + asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
  862 + family = (fms >> 8) & 0xf;
  863 + model = (fms >> 4) & 0xf;
  864 + stepping = fms & 0xf;
  865 + if (family == 6 || family == 0xf)
  866 + model += ((fms >> 16) & 0xf) << 4;
  867 +
  868 + if (verbose)
  869 + fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
  870 + max_level, family, model, stepping, family, model, stepping);
  871 +
  872 + if (!(edx & (1 << 5))) {
  873 + fprintf(stderr, "CPUID: no MSR\n");
  874 + exit(1);
  875 + }
  876 +
  877 + /*
  878 + * check max extended function levels of CPUID.
  879 + * This is needed to check for invariant TSC.
  880 + * This check is valid for both Intel and AMD.
  881 + */
  882 + ebx = ecx = edx = 0;
  883 + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000));
  884 +
  885 + if (max_level < 0x80000007) {
  886 + fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level);
  887 + exit(1);
  888 + }
  889 +
  890 + /*
  891 + * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
  892 + * this check is valid for both Intel and AMD
  893 + */
  894 + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007));
  895 + has_invariant_tsc = edx && (1 << 8);
  896 +
  897 + if (!has_invariant_tsc) {
  898 + fprintf(stderr, "No invariant TSC\n");
  899 + exit(1);
  900 + }
  901 +
  902 + /*
  903 + * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
  904 + * this check is valid for both Intel and AMD
  905 + */
  906 +
  907 + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
  908 + has_aperf = ecx && (1 << 0);
  909 + if (!has_aperf) {
  910 + fprintf(stderr, "No APERF MSR\n");
  911 + exit(1);
  912 + }
  913 +
  914 + do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
  915 + do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
  916 + do_snb_cstates = is_snb(family, model);
  917 + bclk = discover_bclk(family, model);
  918 +
  919 + do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
  920 +}
  921 +
  922 +
  923 +void usage()
  924 +{
  925 + fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n",
  926 + progname);
  927 + exit(1);
  928 +}
  929 +
  930 +
  931 +/*
  932 + * in /dev/cpu/ return success for names that are numbers
  933 + * ie. filter out ".", "..", "microcode".
  934 + */
  935 +int dir_filter(const struct dirent *dirp)
  936 +{
  937 + if (isdigit(dirp->d_name[0]))
  938 + return 1;
  939 + else
  940 + return 0;
  941 +}
  942 +
  943 +int open_dev_cpu_msr(int dummy1)
  944 +{
  945 + return 0;
  946 +}
  947 +
  948 +void turbostat_init()
  949 +{
  950 + check_cpuid();
  951 +
  952 + check_dev_msr();
  953 + check_super_user();
  954 +
  955 + num_cpus = for_all_cpus(alloc_new_cpu_counters);
  956 +
  957 + if (verbose)
  958 + print_nehalem_info();
  959 +}
  960 +
  961 +int fork_it(char **argv)
  962 +{
  963 + int retval;
  964 + pid_t child_pid;
  965 + get_counters(pcc_even);
  966 + gettimeofday(&tv_even, (struct timezone *)NULL);
  967 +
  968 + child_pid = fork();
  969 + if (!child_pid) {
  970 + /* child */
  971 + execvp(argv[0], argv);
  972 + } else {
  973 + int status;
  974 +
  975 + /* parent */
  976 + if (child_pid == -1) {
  977 + perror("fork");
  978 + exit(1);
  979 + }
  980 +
  981 + signal(SIGINT, SIG_IGN);
  982 + signal(SIGQUIT, SIG_IGN);
  983 + if (waitpid(child_pid, &status, 0) == -1) {
  984 + perror("wait");
  985 + exit(1);
  986 + }
  987 + }
  988 + get_counters(pcc_odd);
  989 + gettimeofday(&tv_odd, (struct timezone *)NULL);
  990 + retval = compute_delta(pcc_odd, pcc_even, pcc_delta);
  991 +
  992 + timersub(&tv_odd, &tv_even, &tv_delta);
  993 + compute_average(pcc_delta, pcc_average);
  994 + if (!retval)
  995 + print_counters(pcc_delta);
  996 +
  997 + fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);;
  998 +
  999 + return 0;
  1000 +}
  1001 +
  1002 +void cmdline(int argc, char **argv)
  1003 +{
  1004 + int opt;
  1005 +
  1006 + progname = argv[0];
  1007 +
  1008 + while ((opt = getopt(argc, argv, "+vi:M:")) != -1) {
  1009 + switch (opt) {
  1010 + case 'v':
  1011 + verbose++;
  1012 + break;
  1013 + case 'i':
  1014 + interval_sec = atoi(optarg);
  1015 + break;
  1016 + case 'M':
  1017 + sscanf(optarg, "%x", &extra_msr_offset);
  1018 + if (verbose > 1)
  1019 + fprintf(stderr, "MSR 0x%X\n", extra_msr_offset);
  1020 + break;
  1021 + default:
  1022 + usage();
  1023 + }
  1024 + }
  1025 +}
  1026 +
  1027 +int main(int argc, char **argv)
  1028 +{
  1029 + cmdline(argc, argv);
  1030 +
  1031 + if (verbose > 1)
  1032 + fprintf(stderr, "turbostat Dec 6, 2010"
  1033 + " - Len Brown <lenb@kernel.org>\n");
  1034 + if (verbose > 1)
  1035 + fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n");
  1036 +
  1037 + turbostat_init();
  1038 +
  1039 + /*
  1040 + * if any params left, it must be a command to fork
  1041 + */
  1042 + if (argc - optind)
  1043 + return fork_it(argv + optind);
  1044 + else
  1045 + turbostat_loop();
  1046 +
  1047 + return 0;
  1048 +}
tools/power/x86/x86_energy_perf_policy/Makefile
  1 +x86_energy_perf_policy : x86_energy_perf_policy.c
  2 +
  3 +clean :
  4 + rm -f x86_energy_perf_policy
  5 +
  6 +install :
  7 + install x86_energy_perf_policy /usr/bin/
  8 + install x86_energy_perf_policy.8 /usr/share/man/man8/
tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
  1 +.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
  2 +.\" Distributed under the GPL, Copyleft 1994.
  3 +.TH X86_ENERGY_PERF_POLICY 8
  4 +.SH NAME
  5 +x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
  6 +.SH SYNOPSIS
  7 +.ft B
  8 +.B x86_energy_perf_policy
  9 +.RB [ "\-c cpu" ]
  10 +.RB [ "\-v" ]
  11 +.RB "\-r"
  12 +.br
  13 +.B x86_energy_perf_policy
  14 +.RB [ "\-c cpu" ]
  15 +.RB [ "\-v" ]
  16 +.RB 'performance'
  17 +.br
  18 +.B x86_energy_perf_policy
  19 +.RB [ "\-c cpu" ]
  20 +.RB [ "\-v" ]
  21 +.RB 'normal'
  22 +.br
  23 +.B x86_energy_perf_policy
  24 +.RB [ "\-c cpu" ]
  25 +.RB [ "\-v" ]
  26 +.RB 'powersave'
  27 +.br
  28 +.B x86_energy_perf_policy
  29 +.RB [ "\-c cpu" ]
  30 +.RB [ "\-v" ]
  31 +.RB n
  32 +.br
  33 +.SH DESCRIPTION
  34 +\fBx86_energy_perf_policy\fP
  35 +allows software to convey
  36 +its policy for the relative importance of performance
  37 +versus energy savings to the processor.
  38 +
  39 +The processor uses this information in model-specific ways
  40 +when it must select trade-offs between performance and
  41 +energy efficiency.
  42 +
  43 +This policy hint does not supersede Processor Performance states
  44 +(P-states) or CPU Idle power states (C-states), but allows
  45 +software to have influence where it would otherwise be unable
  46 +to express a preference.
  47 +
  48 +For example, this setting may tell the hardware how
  49 +aggressively or conservatively to control frequency
  50 +in the "turbo range" above the explicitly OS-controlled
  51 +P-state frequency range. It may also tell the hardware
  52 +how aggressively is should enter the OS requested C-states.
  53 +
  54 +Support for this feature is indicated by CPUID.06H.ECX.bit3
  55 +per the Intel Architectures Software Developer's Manual.
  56 +
  57 +.SS Options
  58 +\fB-c\fP limits operation to a single CPU.
  59 +The default is to operate on all CPUs.
  60 +Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
  61 +logical processor, but that the initial implementations
  62 +of the MSR were shared among all processors in each package.
  63 +.PP
  64 +\fB-v\fP increases verbosity. By default
  65 +x86_energy_perf_policy is silent.
  66 +.PP
  67 +\fB-r\fP is for "read-only" mode - the unchanged state
  68 +is read and displayed.
  69 +.PP
  70 +.I performance
  71 +Set a policy where performance is paramount.
  72 +The processor will be unwilling to sacrifice any performance
  73 +for the sake of energy saving. This is the hardware default.
  74 +.PP
  75 +.I normal
  76 +Set a policy with a normal balance between performance and energy efficiency.
  77 +The processor will tolerate minor performance compromise
  78 +for potentially significant energy savings.
  79 +This reasonable default for most desktops and servers.
  80 +.PP
  81 +.I powersave
  82 +Set a policy where the processor can accept
  83 +a measurable performance hit to maximize energy efficiency.
  84 +.PP
  85 +.I n
  86 +Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
  87 +The range of valid numbers is 0-15, where 0 is maximum
  88 +performance and 15 is maximum energy efficiency.
  89 +
  90 +.SH NOTES
  91 +.B "x86_energy_perf_policy "
  92 +runs only as root.
  93 +.SH FILES
  94 +.ta
  95 +.nf
  96 +/dev/cpu/*/msr
  97 +.fi
  98 +
  99 +.SH "SEE ALSO"
  100 +msr(4)
  101 +.PP
  102 +.SH AUTHORS
  103 +.nf
  104 +Written by Len Brown <len.brown@intel.com>
tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
  1 +/*
  2 + * x86_energy_perf_policy -- set the energy versus performance
  3 + * policy preference bias on recent X86 processors.
  4 + */
  5 +/*
  6 + * Copyright (c) 2010, Intel Corporation.
  7 + * Len Brown <len.brown@intel.com>
  8 + *
  9 + * This program is free software; you can redistribute it and/or modify it
  10 + * under the terms and conditions of the GNU General Public License,
  11 + * version 2, as published by the Free Software Foundation.
  12 + *
  13 + * This program is distributed in the hope it will be useful, but WITHOUT
  14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  16 + * more details.
  17 + *
  18 + * You should have received a copy of the GNU General Public License along with
  19 + * this program; if not, write to the Free Software Foundation, Inc.,
  20 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  21 + */
  22 +
  23 +#include <stdio.h>
  24 +#include <unistd.h>
  25 +#include <sys/types.h>
  26 +#include <sys/stat.h>
  27 +#include <sys/resource.h>
  28 +#include <fcntl.h>
  29 +#include <signal.h>
  30 +#include <sys/time.h>
  31 +#include <stdlib.h>
  32 +#include <string.h>
  33 +
  34 +unsigned int verbose; /* set with -v */
  35 +unsigned int read_only; /* set with -r */
  36 +char *progname;
  37 +unsigned long long new_bias;
  38 +int cpu = -1;
  39 +
  40 +/*
  41 + * Usage:
  42 + *
  43 + * -c cpu: limit action to a single CPU (default is all CPUs)
  44 + * -v: verbose output (can invoke more than once)
  45 + * -r: read-only, don't change any settings
  46 + *
  47 + * performance
  48 + * Performance is paramount.
  49 + * Unwilling to sacrafice any performance
  50 + * for the sake of energy saving. (hardware default)
  51 + *
  52 + * normal
  53 + * Can tolerate minor performance compromise
  54 + * for potentially significant energy savings.
  55 + * (reasonable default for most desktops and servers)
  56 + *
  57 + * powersave
  58 + * Can tolerate significant performance hit
  59 + * to maximize energy savings.
  60 + *
  61 + * n
  62 + * a numerical value to write to the underlying MSR.
  63 + */
  64 +void usage(void)
  65 +{
  66 + printf("%s: [-c cpu] [-v] "
  67 + "(-r | 'performance' | 'normal' | 'powersave' | n)\n",
  68 + progname);
  69 + exit(1);
  70 +}
  71 +
  72 +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
  73 +
  74 +#define BIAS_PERFORMANCE 0
  75 +#define BIAS_BALANCE 6
  76 +#define BIAS_POWERSAVE 15
  77 +
  78 +void cmdline(int argc, char **argv)
  79 +{
  80 + int opt;
  81 +
  82 + progname = argv[0];
  83 +
  84 + while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
  85 + switch (opt) {
  86 + case 'c':
  87 + cpu = atoi(optarg);
  88 + break;
  89 + case 'r':
  90 + read_only = 1;
  91 + break;
  92 + case 'v':
  93 + verbose++;
  94 + break;
  95 + default:
  96 + usage();
  97 + }
  98 + }
  99 + /* if -r, then should be no additional optind */
  100 + if (read_only && (argc > optind))
  101 + usage();
  102 +
  103 + /*
  104 + * if no -r , then must be one additional optind
  105 + */
  106 + if (!read_only) {
  107 +
  108 + if (argc != optind + 1) {
  109 + printf("must supply -r or policy param\n");
  110 + usage();
  111 + }
  112 +
  113 + if (!strcmp("performance", argv[optind])) {
  114 + new_bias = BIAS_PERFORMANCE;
  115 + } else if (!strcmp("normal", argv[optind])) {
  116 + new_bias = BIAS_BALANCE;
  117 + } else if (!strcmp("powersave", argv[optind])) {
  118 + new_bias = BIAS_POWERSAVE;
  119 + } else {
  120 + char *endptr;
  121 +
  122 + new_bias = strtoull(argv[optind], &endptr, 0);
  123 + if (endptr == argv[optind] ||
  124 + new_bias > BIAS_POWERSAVE) {
  125 + fprintf(stderr, "invalid value: %s\n",
  126 + argv[optind]);
  127 + usage();
  128 + }
  129 + }
  130 + }
  131 +}
  132 +
  133 +/*
  134 + * validate_cpuid()
  135 + * returns on success, quietly exits on failure (make verbose with -v)
  136 + */
  137 +void validate_cpuid(void)
  138 +{
  139 + unsigned int eax, ebx, ecx, edx, max_level;
  140 + char brand[16];
  141 + unsigned int fms, family, model, stepping;
  142 +
  143 + eax = ebx = ecx = edx = 0;
  144 +
  145 + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
  146 + "=d" (edx) : "a" (0));
  147 +
  148 + if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
  149 + if (verbose)
  150 + fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
  151 + (char *)&ebx, (char *)&edx, (char *)&ecx);
  152 + exit(1);
  153 + }
  154 +
  155 + asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
  156 + family = (fms >> 8) & 0xf;
  157 + model = (fms >> 4) & 0xf;
  158 + stepping = fms & 0xf;
  159 + if (family == 6 || family == 0xf)
  160 + model += ((fms >> 16) & 0xf) << 4;
  161 +
  162 + if (verbose > 1)
  163 + printf("CPUID %s %d levels family:model:stepping "
  164 + "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
  165 + family, model, stepping, family, model, stepping);
  166 +
  167 + if (!(edx & (1 << 5))) {
  168 + if (verbose)
  169 + printf("CPUID: no MSR\n");
  170 + exit(1);
  171 + }
  172 +
  173 + /*
  174 + * Support for MSR_IA32_ENERGY_PERF_BIAS
  175 + * is indicated by CPUID.06H.ECX.bit3
  176 + */
  177 + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
  178 + if (verbose)
  179 + printf("CPUID.06H.ECX: 0x%x\n", ecx);
  180 + if (!(ecx & (1 << 3))) {
  181 + if (verbose)
  182 + printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
  183 + exit(1);
  184 + }
  185 + return; /* success */
  186 +}
  187 +
  188 +unsigned long long get_msr(int cpu, int offset)
  189 +{
  190 + unsigned long long msr;
  191 + char msr_path[32];
  192 + int retval;
  193 + int fd;
  194 +
  195 + sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
  196 + fd = open(msr_path, O_RDONLY);
  197 + if (fd < 0) {
  198 + printf("Try \"# modprobe msr\"\n");
  199 + perror(msr_path);
  200 + exit(1);
  201 + }
  202 +
  203 + retval = pread(fd, &msr, sizeof msr, offset);
  204 +
  205 + if (retval != sizeof msr) {
  206 + printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
  207 + exit(-2);
  208 + }
  209 + close(fd);
  210 + return msr;
  211 +}
  212 +
  213 +unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset)
  214 +{
  215 + unsigned long long old_msr;
  216 + char msr_path[32];
  217 + int retval;
  218 + int fd;
  219 +
  220 + sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
  221 + fd = open(msr_path, O_RDWR);
  222 + if (fd < 0) {
  223 + perror(msr_path);
  224 + exit(1);
  225 + }
  226 +
  227 + retval = pread(fd, &old_msr, sizeof old_msr, offset);
  228 + if (retval != sizeof old_msr) {
  229 + perror("pwrite");
  230 + printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
  231 + exit(-2);
  232 + }
  233 +
  234 + retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
  235 + if (retval != sizeof new_msr) {
  236 + perror("pwrite");
  237 + printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
  238 + exit(-2);
  239 + }
  240 +
  241 + close(fd);
  242 +
  243 + return old_msr;
  244 +}
  245 +
  246 +void print_msr(int cpu)
  247 +{
  248 + printf("cpu%d: 0x%016llx\n",
  249 + cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
  250 +}
  251 +
  252 +void update_msr(int cpu)
  253 +{
  254 + unsigned long long previous_msr;
  255 +
  256 + previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
  257 +
  258 + if (verbose)
  259 + printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n",
  260 + cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
  261 +
  262 + return;
  263 +}
  264 +
  265 +char *proc_stat = "/proc/stat";
  266 +/*
  267 + * run func() on every cpu in /dev/cpu
  268 + */
  269 +void for_every_cpu(void (func)(int))
  270 +{
  271 + FILE *fp;
  272 + int retval;
  273 +
  274 + fp = fopen(proc_stat, "r");
  275 + if (fp == NULL) {
  276 + perror(proc_stat);
  277 + exit(1);
  278 + }
  279 +
  280 + retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
  281 + if (retval != 0) {
  282 + perror("/proc/stat format");
  283 + exit(1);
  284 + }
  285 +
  286 + while (1) {
  287 + int cpu;
  288 +
  289 + retval = fscanf(fp,
  290 + "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
  291 + &cpu);
  292 + if (retval != 1)
  293 + return;
  294 +
  295 + func(cpu);
  296 + }
  297 + fclose(fp);
  298 +}
  299 +
  300 +int main(int argc, char **argv)
  301 +{
  302 + cmdline(argc, argv);
  303 +
  304 + if (verbose > 1)
  305 + printf("x86_energy_perf_policy Nov 24, 2010"
  306 + " - Len Brown <lenb@kernel.org>\n");
  307 + if (verbose > 1 && !read_only)
  308 + printf("new_bias %lld\n", new_bias);
  309 +
  310 + validate_cpuid();
  311 +
  312 + if (cpu != -1) {
  313 + if (read_only)
  314 + print_msr(cpu);
  315 + else
  316 + update_msr(cpu);
  317 + } else {
  318 + if (read_only)
  319 + for_every_cpu(print_msr);
  320 + else
  321 + for_every_cpu(update_msr);
  322 + }
  323 +
  324 + return 0;
  325 +}