Commit c4ee841f602e5eef8eab673295c49c5b49d7732b

Authored by Dirk Brandewie
Committed by Rafael J. Wysocki
1 parent f0fe3cd7e1

intel_pstate: add sample time scaling

The PID assumes that samples are of equal time, which for a deferable
timers this is not true when the system goes idle.  This causes the
PID to take a long time to converge to the min P state and depending
on the pattern of the idle load can make the P state appear stuck.

The hold-off value of three sample times before using the scaling is
to give a grace period for applications that have high performance
requirements and spend a lot of time idle,  The poster child for this
behavior is the ffmpeg benchmark in the Phoronix test suite.

Cc: 3.14+ <stable@vger.kernel.org> # 3.14+
Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Showing 1 changed file with 17 additions and 1 deletions Side-by-side Diff

drivers/cpufreq/intel_pstate.c
... ... @@ -60,6 +60,7 @@
60 60 u64 aperf;
61 61 u64 mperf;
62 62 int freq;
  63 + ktime_t time;
63 64 };
64 65  
65 66 struct pstate_data {
... ... @@ -97,6 +98,7 @@
97 98 struct vid_data vid;
98 99 struct _pid pid;
99 100  
  101 + ktime_t last_sample_time;
100 102 u64 prev_aperf;
101 103 u64 prev_mperf;
102 104 struct sample sample;
... ... @@ -583,6 +585,8 @@
583 585 aperf = aperf >> FRAC_BITS;
584 586 mperf = mperf >> FRAC_BITS;
585 587  
  588 + cpu->last_sample_time = cpu->sample.time;
  589 + cpu->sample.time = ktime_get();
586 590 cpu->sample.aperf = aperf;
587 591 cpu->sample.mperf = mperf;
588 592 cpu->sample.aperf -= cpu->prev_aperf;
589 593  
... ... @@ -605,12 +609,24 @@
605 609  
606 610 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
607 611 {
608   - int32_t core_busy, max_pstate, current_pstate;
  612 + int32_t core_busy, max_pstate, current_pstate, sample_ratio;
  613 + u32 duration_us;
  614 + u32 sample_time;
609 615  
610 616 core_busy = cpu->sample.core_pct_busy;
611 617 max_pstate = int_tofp(cpu->pstate.max_pstate);
612 618 current_pstate = int_tofp(cpu->pstate.current_pstate);
613 619 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
  620 +
  621 + sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC);
  622 + duration_us = (u32) ktime_us_delta(cpu->sample.time,
  623 + cpu->last_sample_time);
  624 + if (duration_us > sample_time * 3) {
  625 + sample_ratio = div_fp(int_tofp(sample_time),
  626 + int_tofp(duration_us));
  627 + core_busy = mul_fp(core_busy, sample_ratio);
  628 + }
  629 +
614 630 return core_busy;
615 631 }
616 632