Commit c4ee841f602e5eef8eab673295c49c5b49d7732b
Committed by
Rafael J. Wysocki
1 parent
f0fe3cd7e1
Exists in
ti-lsk-linux-4.1.y
and in
12 other branches
intel_pstate: add sample time scaling
The PID assumes that samples are of equal time, which for a deferable timers this is not true when the system goes idle. This causes the PID to take a long time to converge to the min P state and depending on the pattern of the idle load can make the P state appear stuck. The hold-off value of three sample times before using the scaling is to give a grace period for applications that have high performance requirements and spend a lot of time idle, The poster child for this behavior is the ffmpeg benchmark in the Phoronix test suite. Cc: 3.14+ <stable@vger.kernel.org> # 3.14+ Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Showing 1 changed file with 17 additions and 1 deletions Side-by-side Diff
drivers/cpufreq/intel_pstate.c
... | ... | @@ -60,6 +60,7 @@ |
60 | 60 | u64 aperf; |
61 | 61 | u64 mperf; |
62 | 62 | int freq; |
63 | + ktime_t time; | |
63 | 64 | }; |
64 | 65 | |
65 | 66 | struct pstate_data { |
... | ... | @@ -97,6 +98,7 @@ |
97 | 98 | struct vid_data vid; |
98 | 99 | struct _pid pid; |
99 | 100 | |
101 | + ktime_t last_sample_time; | |
100 | 102 | u64 prev_aperf; |
101 | 103 | u64 prev_mperf; |
102 | 104 | struct sample sample; |
... | ... | @@ -583,6 +585,8 @@ |
583 | 585 | aperf = aperf >> FRAC_BITS; |
584 | 586 | mperf = mperf >> FRAC_BITS; |
585 | 587 | |
588 | + cpu->last_sample_time = cpu->sample.time; | |
589 | + cpu->sample.time = ktime_get(); | |
586 | 590 | cpu->sample.aperf = aperf; |
587 | 591 | cpu->sample.mperf = mperf; |
588 | 592 | cpu->sample.aperf -= cpu->prev_aperf; |
589 | 593 | |
... | ... | @@ -605,12 +609,24 @@ |
605 | 609 | |
606 | 610 | static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) |
607 | 611 | { |
608 | - int32_t core_busy, max_pstate, current_pstate; | |
612 | + int32_t core_busy, max_pstate, current_pstate, sample_ratio; | |
613 | + u32 duration_us; | |
614 | + u32 sample_time; | |
609 | 615 | |
610 | 616 | core_busy = cpu->sample.core_pct_busy; |
611 | 617 | max_pstate = int_tofp(cpu->pstate.max_pstate); |
612 | 618 | current_pstate = int_tofp(cpu->pstate.current_pstate); |
613 | 619 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); |
620 | + | |
621 | + sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC); | |
622 | + duration_us = (u32) ktime_us_delta(cpu->sample.time, | |
623 | + cpu->last_sample_time); | |
624 | + if (duration_us > sample_time * 3) { | |
625 | + sample_ratio = div_fp(int_tofp(sample_time), | |
626 | + int_tofp(duration_us)); | |
627 | + core_busy = mul_fp(core_busy, sample_ratio); | |
628 | + } | |
629 | + | |
614 | 630 | return core_busy; |
615 | 631 | } |
616 | 632 |