Commit 6548be50135e2a406c767cfef246b7d5a1c1cc66
Exists in
ti-linux-3.15.y
and in
1 other branch
Merge branch 'master' of http://git.kernel.org/pub/scm/linux/kernel/git/torvalds…
…/linux into ti-linux-3.15.y * 'master' of http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux: intel_pstate: Improve initial busy calculation intel_pstate: add sample time scaling intel_pstate: Correct rounding in busy calculation intel_pstate: Remove C0 tracking drm/radeon: use the CP DMA on CIK drm/radeon: sync page table updates drm/radeon: fix vm buffer size estimation drm/crtc-helper: skip locking checks in panicking path drm/radeon/dpm: resume fixes for some systems Signed-off-by: Dan Murphy <DMurphy@ti.com>
Showing 7 changed files Side-by-side Diff
drivers/cpufreq/intel_pstate.c
... | ... | @@ -40,11 +40,11 @@ |
40 | 40 | #define BYT_TURBO_VIDS 0x66d |
41 | 41 | |
42 | 42 | |
43 | -#define FRAC_BITS 6 | |
43 | +#define FRAC_BITS 8 | |
44 | 44 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) |
45 | 45 | #define fp_toint(X) ((X) >> FRAC_BITS) |
46 | -#define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) | |
47 | 46 | |
47 | + | |
48 | 48 | static inline int32_t mul_fp(int32_t x, int32_t y) |
49 | 49 | { |
50 | 50 | return ((int64_t)x * (int64_t)y) >> FRAC_BITS; |
51 | 51 | |
... | ... | @@ -59,8 +59,8 @@ |
59 | 59 | int32_t core_pct_busy; |
60 | 60 | u64 aperf; |
61 | 61 | u64 mperf; |
62 | - unsigned long long tsc; | |
63 | 62 | int freq; |
63 | + ktime_t time; | |
64 | 64 | }; |
65 | 65 | |
66 | 66 | struct pstate_data { |
67 | 67 | |
... | ... | @@ -98,9 +98,9 @@ |
98 | 98 | struct vid_data vid; |
99 | 99 | struct _pid pid; |
100 | 100 | |
101 | + ktime_t last_sample_time; | |
101 | 102 | u64 prev_aperf; |
102 | 103 | u64 prev_mperf; |
103 | - unsigned long long prev_tsc; | |
104 | 104 | struct sample sample; |
105 | 105 | }; |
106 | 106 | |
... | ... | @@ -200,7 +200,10 @@ |
200 | 200 | pid->last_err = fp_error; |
201 | 201 | |
202 | 202 | result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; |
203 | - | |
203 | + if (result >= 0) | |
204 | + result = result + (1 << (FRAC_BITS-1)); | |
205 | + else | |
206 | + result = result - (1 << (FRAC_BITS-1)); | |
204 | 207 | return (signed int)fp_toint(result); |
205 | 208 | } |
206 | 209 | |
207 | 210 | |
208 | 211 | |
209 | 212 | |
210 | 213 | |
211 | 214 | |
212 | 215 | |
213 | 216 | |
214 | 217 | |
215 | 218 | |
216 | 219 | |
... | ... | @@ -560,47 +563,42 @@ |
560 | 563 | static inline void intel_pstate_calc_busy(struct cpudata *cpu, |
561 | 564 | struct sample *sample) |
562 | 565 | { |
563 | - int32_t core_pct; | |
564 | - int32_t c0_pct; | |
566 | + int64_t core_pct; | |
567 | + int32_t rem; | |
565 | 568 | |
566 | - core_pct = div_fp(int_tofp((sample->aperf)), | |
567 | - int_tofp((sample->mperf))); | |
568 | - core_pct = mul_fp(core_pct, int_tofp(100)); | |
569 | - FP_ROUNDUP(core_pct); | |
569 | + core_pct = int_tofp(sample->aperf) * int_tofp(100); | |
570 | + core_pct = div_u64_rem(core_pct, int_tofp(sample->mperf), &rem); | |
570 | 571 | |
571 | - c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); | |
572 | + if ((rem << 1) >= int_tofp(sample->mperf)) | |
573 | + core_pct += 1; | |
572 | 574 | |
573 | 575 | sample->freq = fp_toint( |
574 | 576 | mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); |
575 | 577 | |
576 | - sample->core_pct_busy = mul_fp(core_pct, c0_pct); | |
578 | + sample->core_pct_busy = (int32_t)core_pct; | |
577 | 579 | } |
578 | 580 | |
579 | 581 | static inline void intel_pstate_sample(struct cpudata *cpu) |
580 | 582 | { |
581 | 583 | u64 aperf, mperf; |
582 | - unsigned long long tsc; | |
583 | 584 | |
584 | 585 | rdmsrl(MSR_IA32_APERF, aperf); |
585 | 586 | rdmsrl(MSR_IA32_MPERF, mperf); |
586 | - tsc = native_read_tsc(); | |
587 | 587 | |
588 | 588 | aperf = aperf >> FRAC_BITS; |
589 | 589 | mperf = mperf >> FRAC_BITS; |
590 | - tsc = tsc >> FRAC_BITS; | |
591 | 590 | |
591 | + cpu->last_sample_time = cpu->sample.time; | |
592 | + cpu->sample.time = ktime_get(); | |
592 | 593 | cpu->sample.aperf = aperf; |
593 | 594 | cpu->sample.mperf = mperf; |
594 | - cpu->sample.tsc = tsc; | |
595 | 595 | cpu->sample.aperf -= cpu->prev_aperf; |
596 | 596 | cpu->sample.mperf -= cpu->prev_mperf; |
597 | - cpu->sample.tsc -= cpu->prev_tsc; | |
598 | 597 | |
599 | 598 | intel_pstate_calc_busy(cpu, &cpu->sample); |
600 | 599 | |
601 | 600 | cpu->prev_aperf = aperf; |
602 | 601 | cpu->prev_mperf = mperf; |
603 | - cpu->prev_tsc = tsc; | |
604 | 602 | } |
605 | 603 | |
606 | 604 | static inline void intel_pstate_set_sample_time(struct cpudata *cpu) |
607 | 605 | |
... | ... | @@ -614,13 +612,25 @@ |
614 | 612 | |
615 | 613 | static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) |
616 | 614 | { |
617 | - int32_t core_busy, max_pstate, current_pstate; | |
615 | + int32_t core_busy, max_pstate, current_pstate, sample_ratio; | |
616 | + u32 duration_us; | |
617 | + u32 sample_time; | |
618 | 618 | |
619 | 619 | core_busy = cpu->sample.core_pct_busy; |
620 | 620 | max_pstate = int_tofp(cpu->pstate.max_pstate); |
621 | 621 | current_pstate = int_tofp(cpu->pstate.current_pstate); |
622 | 622 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); |
623 | - return FP_ROUNDUP(core_busy); | |
623 | + | |
624 | + sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC); | |
625 | + duration_us = (u32) ktime_us_delta(cpu->sample.time, | |
626 | + cpu->last_sample_time); | |
627 | + if (duration_us > sample_time * 3) { | |
628 | + sample_ratio = div_fp(int_tofp(sample_time), | |
629 | + int_tofp(duration_us)); | |
630 | + core_busy = mul_fp(core_busy, sample_ratio); | |
631 | + } | |
632 | + | |
633 | + return core_busy; | |
624 | 634 | } |
625 | 635 | |
626 | 636 | static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) |
drivers/gpu/drm/drm_crtc_helper.c
... | ... | @@ -29,6 +29,7 @@ |
29 | 29 | * Jesse Barnes <jesse.barnes@intel.com> |
30 | 30 | */ |
31 | 31 | |
32 | +#include <linux/kernel.h> | |
32 | 33 | #include <linux/export.h> |
33 | 34 | #include <linux/moduleparam.h> |
34 | 35 | |
... | ... | @@ -88,7 +89,13 @@ |
88 | 89 | struct drm_connector *connector; |
89 | 90 | struct drm_device *dev = encoder->dev; |
90 | 91 | |
91 | - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); | |
92 | + /* | |
93 | + * We can expect this mutex to be locked if we are not panicking. | |
94 | + * Locking is currently fubar in the panic handler. | |
95 | + */ | |
96 | + if (!oops_in_progress) | |
97 | + WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); | |
98 | + | |
92 | 99 | list_for_each_entry(connector, &dev->mode_config.connector_list, head) |
93 | 100 | if (connector->encoder == encoder) |
94 | 101 | return true; |
... | ... | @@ -112,7 +119,13 @@ |
112 | 119 | struct drm_encoder *encoder; |
113 | 120 | struct drm_device *dev = crtc->dev; |
114 | 121 | |
115 | - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); | |
122 | + /* | |
123 | + * We can expect this mutex to be locked if we are not panicking. | |
124 | + * Locking is currently fubar in the panic handler. | |
125 | + */ | |
126 | + if (!oops_in_progress) | |
127 | + WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); | |
128 | + | |
116 | 129 | list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) |
117 | 130 | if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder)) |
118 | 131 | return true; |
drivers/gpu/drm/radeon/atombios_crtc.c
... | ... | @@ -270,8 +270,6 @@ |
270 | 270 | switch (mode) { |
271 | 271 | case DRM_MODE_DPMS_ON: |
272 | 272 | radeon_crtc->enabled = true; |
273 | - /* adjust pm to dpms changes BEFORE enabling crtcs */ | |
274 | - radeon_pm_compute_clocks(rdev); | |
275 | 273 | atombios_enable_crtc(crtc, ATOM_ENABLE); |
276 | 274 | if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev)) |
277 | 275 | atombios_enable_crtc_memreq(crtc, ATOM_ENABLE); |
278 | 276 | |
... | ... | @@ -289,10 +287,10 @@ |
289 | 287 | atombios_enable_crtc_memreq(crtc, ATOM_DISABLE); |
290 | 288 | atombios_enable_crtc(crtc, ATOM_DISABLE); |
291 | 289 | radeon_crtc->enabled = false; |
292 | - /* adjust pm to dpms changes AFTER disabling crtcs */ | |
293 | - radeon_pm_compute_clocks(rdev); | |
294 | 290 | break; |
295 | 291 | } |
292 | + /* adjust pm to dpms */ | |
293 | + radeon_pm_compute_clocks(rdev); | |
296 | 294 | } |
297 | 295 | |
298 | 296 | static void |
drivers/gpu/drm/radeon/radeon_asic.c
... | ... | @@ -2049,8 +2049,8 @@ |
2049 | 2049 | .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, |
2050 | 2050 | .dma = &cik_copy_dma, |
2051 | 2051 | .dma_ring_index = R600_RING_TYPE_DMA_INDEX, |
2052 | - .copy = &cik_copy_dma, | |
2053 | - .copy_ring_index = R600_RING_TYPE_DMA_INDEX, | |
2052 | + .copy = &cik_copy_cpdma, | |
2053 | + .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, | |
2054 | 2054 | }, |
2055 | 2055 | .surface = { |
2056 | 2056 | .set_reg = r600_set_surface_reg, |
drivers/gpu/drm/radeon/radeon_device.c
... | ... | @@ -1558,6 +1558,10 @@ |
1558 | 1558 | |
1559 | 1559 | drm_kms_helper_poll_enable(dev); |
1560 | 1560 | |
1561 | + /* set the power state here in case we are a PX system or headless */ | |
1562 | + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) | |
1563 | + radeon_pm_compute_clocks(rdev); | |
1564 | + | |
1561 | 1565 | if (fbcon) { |
1562 | 1566 | radeon_fbdev_set_suspend(rdev, 0); |
1563 | 1567 | console_unlock(); |
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/radeon/radeon_vm.c
... | ... | @@ -132,7 +132,7 @@ |
132 | 132 | struct radeon_cs_reloc *list; |
133 | 133 | unsigned i, idx; |
134 | 134 | |
135 | - list = kmalloc_array(vm->max_pde_used + 1, | |
135 | + list = kmalloc_array(vm->max_pde_used + 2, | |
136 | 136 | sizeof(struct radeon_cs_reloc), GFP_KERNEL); |
137 | 137 | if (!list) |
138 | 138 | return NULL; |
... | ... | @@ -585,7 +585,8 @@ |
585 | 585 | { |
586 | 586 | static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; |
587 | 587 | |
588 | - uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); | |
588 | + struct radeon_bo *pd = vm->page_directory; | |
589 | + uint64_t pd_addr = radeon_bo_gpu_offset(pd); | |
589 | 590 | uint64_t last_pde = ~0, last_pt = ~0; |
590 | 591 | unsigned count = 0, pt_idx, ndw; |
591 | 592 | struct radeon_ib ib; |
... | ... | @@ -642,6 +643,7 @@ |
642 | 643 | incr, R600_PTE_VALID); |
643 | 644 | |
644 | 645 | if (ib.length_dw != 0) { |
646 | + radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); | |
645 | 647 | radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); |
646 | 648 | r = radeon_ib_schedule(rdev, &ib, NULL); |
647 | 649 | if (r) { |
648 | 650 | |
649 | 651 | |
... | ... | @@ -689,15 +691,18 @@ |
689 | 691 | /* walk over the address space and update the page tables */ |
690 | 692 | for (addr = start; addr < end; ) { |
691 | 693 | uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; |
694 | + struct radeon_bo *pt = vm->page_tables[pt_idx].bo; | |
692 | 695 | unsigned nptes; |
693 | 696 | uint64_t pte; |
694 | 697 | |
698 | + radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj); | |
699 | + | |
695 | 700 | if ((addr & ~mask) == (end & ~mask)) |
696 | 701 | nptes = end - addr; |
697 | 702 | else |
698 | 703 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); |
699 | 704 | |
700 | - pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo); | |
705 | + pte = radeon_bo_gpu_offset(pt); | |
701 | 706 | pte += (addr & mask) * 8; |
702 | 707 | |
703 | 708 | if ((last_pte + 8 * count) != pte) { |