Commit c717d1561493c58d030405c7e30e35459db31912
Exists in
ti-lsk-linux-4.1.y
and in
12 other branches
Merge tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull intel pstate fixes from Rafael Wysocki: "Final power management fixes for 3.15 - Taking non-idle time into account when calculating core busy time was a mistake and led to a performance regression. Since the problem it was supposed to address is now taken care of in a different way, we don't need to do it any more, so drop the non-idle time tracking from intel_pstate. Dirk Brandewie. - Changing to fixed point math throughout the busy calculation introduced rounding errors that adversely affect the accuracy of intel_pstate's computations. Fix from Dirk Brandewie. - The PID controller algorithm used by intel_pstate assumes that the time interval between two adjacent samples will always be the same which is not the case for deferable timers (used by intel_pstate) when the system is idle. This leads to inaccurate predictions and artificially increases convergence times for the minimum P-state. Fix from Dirk Brandewie. - intel_pstate carries out computations using 32-bit variables that may overflow for large enough values of APERF/MPERF. Switch to using 64-bit variables for computations, from Doug Smythies" * tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: intel_pstate: Improve initial busy calculation intel_pstate: add sample time scaling intel_pstate: Correct rounding in busy calculation intel_pstate: Remove C0 tracking
Showing 1 changed file Side-by-side Diff
drivers/cpufreq/intel_pstate.c
... | ... | @@ -40,11 +40,11 @@ |
40 | 40 | #define BYT_TURBO_VIDS 0x66d |
41 | 41 | |
42 | 42 | |
43 | -#define FRAC_BITS 6 | |
43 | +#define FRAC_BITS 8 | |
44 | 44 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) |
45 | 45 | #define fp_toint(X) ((X) >> FRAC_BITS) |
46 | -#define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) | |
47 | 46 | |
47 | + | |
48 | 48 | static inline int32_t mul_fp(int32_t x, int32_t y) |
49 | 49 | { |
50 | 50 | return ((int64_t)x * (int64_t)y) >> FRAC_BITS; |
51 | 51 | |
... | ... | @@ -59,8 +59,8 @@ |
59 | 59 | int32_t core_pct_busy; |
60 | 60 | u64 aperf; |
61 | 61 | u64 mperf; |
62 | - unsigned long long tsc; | |
63 | 62 | int freq; |
63 | + ktime_t time; | |
64 | 64 | }; |
65 | 65 | |
66 | 66 | struct pstate_data { |
67 | 67 | |
... | ... | @@ -98,9 +98,9 @@ |
98 | 98 | struct vid_data vid; |
99 | 99 | struct _pid pid; |
100 | 100 | |
101 | + ktime_t last_sample_time; | |
101 | 102 | u64 prev_aperf; |
102 | 103 | u64 prev_mperf; |
103 | - unsigned long long prev_tsc; | |
104 | 104 | struct sample sample; |
105 | 105 | }; |
106 | 106 | |
... | ... | @@ -200,7 +200,10 @@ |
200 | 200 | pid->last_err = fp_error; |
201 | 201 | |
202 | 202 | result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; |
203 | - | |
203 | + if (result >= 0) | |
204 | + result = result + (1 << (FRAC_BITS-1)); | |
205 | + else | |
206 | + result = result - (1 << (FRAC_BITS-1)); | |
204 | 207 | return (signed int)fp_toint(result); |
205 | 208 | } |
206 | 209 | |
207 | 210 | |
208 | 211 | |
209 | 212 | |
210 | 213 | |
211 | 214 | |
212 | 215 | |
213 | 216 | |
214 | 217 | |
215 | 218 | |
216 | 219 | |
... | ... | @@ -560,47 +563,42 @@ |
560 | 563 | static inline void intel_pstate_calc_busy(struct cpudata *cpu, |
561 | 564 | struct sample *sample) |
562 | 565 | { |
563 | - int32_t core_pct; | |
564 | - int32_t c0_pct; | |
566 | + int64_t core_pct; | |
567 | + int32_t rem; | |
565 | 568 | |
566 | - core_pct = div_fp(int_tofp((sample->aperf)), | |
567 | - int_tofp((sample->mperf))); | |
568 | - core_pct = mul_fp(core_pct, int_tofp(100)); | |
569 | - FP_ROUNDUP(core_pct); | |
569 | + core_pct = int_tofp(sample->aperf) * int_tofp(100); | |
570 | + core_pct = div_u64_rem(core_pct, int_tofp(sample->mperf), &rem); | |
570 | 571 | |
571 | - c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); | |
572 | + if ((rem << 1) >= int_tofp(sample->mperf)) | |
573 | + core_pct += 1; | |
572 | 574 | |
573 | 575 | sample->freq = fp_toint( |
574 | 576 | mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); |
575 | 577 | |
576 | - sample->core_pct_busy = mul_fp(core_pct, c0_pct); | |
578 | + sample->core_pct_busy = (int32_t)core_pct; | |
577 | 579 | } |
578 | 580 | |
579 | 581 | static inline void intel_pstate_sample(struct cpudata *cpu) |
580 | 582 | { |
581 | 583 | u64 aperf, mperf; |
582 | - unsigned long long tsc; | |
583 | 584 | |
584 | 585 | rdmsrl(MSR_IA32_APERF, aperf); |
585 | 586 | rdmsrl(MSR_IA32_MPERF, mperf); |
586 | - tsc = native_read_tsc(); | |
587 | 587 | |
588 | 588 | aperf = aperf >> FRAC_BITS; |
589 | 589 | mperf = mperf >> FRAC_BITS; |
590 | - tsc = tsc >> FRAC_BITS; | |
591 | 590 | |
591 | + cpu->last_sample_time = cpu->sample.time; | |
592 | + cpu->sample.time = ktime_get(); | |
592 | 593 | cpu->sample.aperf = aperf; |
593 | 594 | cpu->sample.mperf = mperf; |
594 | - cpu->sample.tsc = tsc; | |
595 | 595 | cpu->sample.aperf -= cpu->prev_aperf; |
596 | 596 | cpu->sample.mperf -= cpu->prev_mperf; |
597 | - cpu->sample.tsc -= cpu->prev_tsc; | |
598 | 597 | |
599 | 598 | intel_pstate_calc_busy(cpu, &cpu->sample); |
600 | 599 | |
601 | 600 | cpu->prev_aperf = aperf; |
602 | 601 | cpu->prev_mperf = mperf; |
603 | - cpu->prev_tsc = tsc; | |
604 | 602 | } |
605 | 603 | |
606 | 604 | static inline void intel_pstate_set_sample_time(struct cpudata *cpu) |
607 | 605 | |
... | ... | @@ -614,13 +612,25 @@ |
614 | 612 | |
615 | 613 | static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) |
616 | 614 | { |
617 | - int32_t core_busy, max_pstate, current_pstate; | |
615 | + int32_t core_busy, max_pstate, current_pstate, sample_ratio; | |
616 | + u32 duration_us; | |
617 | + u32 sample_time; | |
618 | 618 | |
619 | 619 | core_busy = cpu->sample.core_pct_busy; |
620 | 620 | max_pstate = int_tofp(cpu->pstate.max_pstate); |
621 | 621 | current_pstate = int_tofp(cpu->pstate.current_pstate); |
622 | 622 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); |
623 | - return FP_ROUNDUP(core_busy); | |
623 | + | |
624 | + sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC); | |
625 | + duration_us = (u32) ktime_us_delta(cpu->sample.time, | |
626 | + cpu->last_sample_time); | |
627 | + if (duration_us > sample_time * 3) { | |
628 | + sample_ratio = div_fp(int_tofp(sample_time), | |
629 | + int_tofp(duration_us)); | |
630 | + core_busy = mul_fp(core_busy, sample_ratio); | |
631 | + } | |
632 | + | |
633 | + return core_busy; | |
624 | 634 | } |
625 | 635 | |
626 | 636 | static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) |