Commit 6548be50135e2a406c767cfef246b7d5a1c1cc66

Authored by Dan Murphy

Merge branch 'master' of http://git.kernel.org/pub/scm/linux/kernel/git/torvalds…

…/linux into ti-linux-3.15.y

* 'master' of http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux:
  intel_pstate: Improve initial busy calculation
  intel_pstate: add sample time scaling
  intel_pstate: Correct rounding in busy calculation
  intel_pstate: Remove C0 tracking
  drm/radeon: use the CP DMA on CIK
  drm/radeon: sync page table updates
  drm/radeon: fix vm buffer size estimation
  drm/crtc-helper: skip locking checks in panicking path
  drm/radeon/dpm: resume fixes for some systems

Signed-off-by: Dan Murphy <DMurphy@ti.com>

Showing 7 changed files Side-by-side Diff

drivers/cpufreq/intel_pstate.c
... ... @@ -40,11 +40,11 @@
40 40 #define BYT_TURBO_VIDS 0x66d
41 41  
42 42  
43   -#define FRAC_BITS 6
  43 +#define FRAC_BITS 8
44 44 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
45 45 #define fp_toint(X) ((X) >> FRAC_BITS)
46   -#define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS)
47 46  
  47 +
48 48 static inline int32_t mul_fp(int32_t x, int32_t y)
49 49 {
50 50 return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
51 51  
... ... @@ -59,8 +59,8 @@
59 59 int32_t core_pct_busy;
60 60 u64 aperf;
61 61 u64 mperf;
62   - unsigned long long tsc;
63 62 int freq;
  63 + ktime_t time;
64 64 };
65 65  
66 66 struct pstate_data {
67 67  
... ... @@ -98,9 +98,9 @@
98 98 struct vid_data vid;
99 99 struct _pid pid;
100 100  
  101 + ktime_t last_sample_time;
101 102 u64 prev_aperf;
102 103 u64 prev_mperf;
103   - unsigned long long prev_tsc;
104 104 struct sample sample;
105 105 };
106 106  
... ... @@ -200,7 +200,10 @@
200 200 pid->last_err = fp_error;
201 201  
202 202 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
203   -
  203 + if (result >= 0)
  204 + result = result + (1 << (FRAC_BITS-1));
  205 + else
  206 + result = result - (1 << (FRAC_BITS-1));
204 207 return (signed int)fp_toint(result);
205 208 }
206 209  
207 210  
208 211  
209 212  
210 213  
211 214  
212 215  
213 216  
214 217  
215 218  
216 219  
... ... @@ -560,47 +563,42 @@
560 563 static inline void intel_pstate_calc_busy(struct cpudata *cpu,
561 564 struct sample *sample)
562 565 {
563   - int32_t core_pct;
564   - int32_t c0_pct;
  566 + int64_t core_pct;
  567 + int32_t rem;
565 568  
566   - core_pct = div_fp(int_tofp((sample->aperf)),
567   - int_tofp((sample->mperf)));
568   - core_pct = mul_fp(core_pct, int_tofp(100));
569   - FP_ROUNDUP(core_pct);
  569 + core_pct = int_tofp(sample->aperf) * int_tofp(100);
  570 + core_pct = div_u64_rem(core_pct, int_tofp(sample->mperf), &rem);
570 571  
571   - c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc));
  572 + if ((rem << 1) >= int_tofp(sample->mperf))
  573 + core_pct += 1;
572 574  
573 575 sample->freq = fp_toint(
574 576 mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct));
575 577  
576   - sample->core_pct_busy = mul_fp(core_pct, c0_pct);
  578 + sample->core_pct_busy = (int32_t)core_pct;
577 579 }
578 580  
579 581 static inline void intel_pstate_sample(struct cpudata *cpu)
580 582 {
581 583 u64 aperf, mperf;
582   - unsigned long long tsc;
583 584  
584 585 rdmsrl(MSR_IA32_APERF, aperf);
585 586 rdmsrl(MSR_IA32_MPERF, mperf);
586   - tsc = native_read_tsc();
587 587  
588 588 aperf = aperf >> FRAC_BITS;
589 589 mperf = mperf >> FRAC_BITS;
590   - tsc = tsc >> FRAC_BITS;
591 590  
  591 + cpu->last_sample_time = cpu->sample.time;
  592 + cpu->sample.time = ktime_get();
592 593 cpu->sample.aperf = aperf;
593 594 cpu->sample.mperf = mperf;
594   - cpu->sample.tsc = tsc;
595 595 cpu->sample.aperf -= cpu->prev_aperf;
596 596 cpu->sample.mperf -= cpu->prev_mperf;
597   - cpu->sample.tsc -= cpu->prev_tsc;
598 597  
599 598 intel_pstate_calc_busy(cpu, &cpu->sample);
600 599  
601 600 cpu->prev_aperf = aperf;
602 601 cpu->prev_mperf = mperf;
603   - cpu->prev_tsc = tsc;
604 602 }
605 603  
606 604 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
607 605  
... ... @@ -614,13 +612,25 @@
614 612  
615 613 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
616 614 {
617   - int32_t core_busy, max_pstate, current_pstate;
  615 + int32_t core_busy, max_pstate, current_pstate, sample_ratio;
  616 + u32 duration_us;
  617 + u32 sample_time;
618 618  
619 619 core_busy = cpu->sample.core_pct_busy;
620 620 max_pstate = int_tofp(cpu->pstate.max_pstate);
621 621 current_pstate = int_tofp(cpu->pstate.current_pstate);
622 622 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
623   - return FP_ROUNDUP(core_busy);
  623 +
  624 + sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC);
  625 + duration_us = (u32) ktime_us_delta(cpu->sample.time,
  626 + cpu->last_sample_time);
  627 + if (duration_us > sample_time * 3) {
  628 + sample_ratio = div_fp(int_tofp(sample_time),
  629 + int_tofp(duration_us));
  630 + core_busy = mul_fp(core_busy, sample_ratio);
  631 + }
  632 +
  633 + return core_busy;
624 634 }
625 635  
626 636 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
drivers/gpu/drm/drm_crtc_helper.c
... ... @@ -29,6 +29,7 @@
29 29 * Jesse Barnes <jesse.barnes@intel.com>
30 30 */
31 31  
  32 +#include <linux/kernel.h>
32 33 #include <linux/export.h>
33 34 #include <linux/moduleparam.h>
34 35  
... ... @@ -88,7 +89,13 @@
88 89 struct drm_connector *connector;
89 90 struct drm_device *dev = encoder->dev;
90 91  
91   - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
  92 + /*
  93 + * We can expect this mutex to be locked if we are not panicking.
  94 + * Locking is currently fubar in the panic handler.
  95 + */
  96 + if (!oops_in_progress)
  97 + WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
  98 +
92 99 list_for_each_entry(connector, &dev->mode_config.connector_list, head)
93 100 if (connector->encoder == encoder)
94 101 return true;
... ... @@ -112,7 +119,13 @@
112 119 struct drm_encoder *encoder;
113 120 struct drm_device *dev = crtc->dev;
114 121  
115   - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
  122 + /*
  123 + * We can expect this mutex to be locked if we are not panicking.
  124 + * Locking is currently fubar in the panic handler.
  125 + */
  126 + if (!oops_in_progress)
  127 + WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
  128 +
116 129 list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
117 130 if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder))
118 131 return true;
drivers/gpu/drm/radeon/atombios_crtc.c
... ... @@ -270,8 +270,6 @@
270 270 switch (mode) {
271 271 case DRM_MODE_DPMS_ON:
272 272 radeon_crtc->enabled = true;
273   - /* adjust pm to dpms changes BEFORE enabling crtcs */
274   - radeon_pm_compute_clocks(rdev);
275 273 atombios_enable_crtc(crtc, ATOM_ENABLE);
276 274 if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
277 275 atombios_enable_crtc_memreq(crtc, ATOM_ENABLE);
278 276  
... ... @@ -289,10 +287,10 @@
289 287 atombios_enable_crtc_memreq(crtc, ATOM_DISABLE);
290 288 atombios_enable_crtc(crtc, ATOM_DISABLE);
291 289 radeon_crtc->enabled = false;
292   - /* adjust pm to dpms changes AFTER disabling crtcs */
293   - radeon_pm_compute_clocks(rdev);
294 290 break;
295 291 }
  292 + /* adjust pm to dpms */
  293 + radeon_pm_compute_clocks(rdev);
296 294 }
297 295  
298 296 static void
drivers/gpu/drm/radeon/radeon_asic.c
... ... @@ -2049,8 +2049,8 @@
2049 2049 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
2050 2050 .dma = &cik_copy_dma,
2051 2051 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
2052   - .copy = &cik_copy_dma,
2053   - .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
  2052 + .copy = &cik_copy_cpdma,
  2053 + .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
2054 2054 },
2055 2055 .surface = {
2056 2056 .set_reg = r600_set_surface_reg,
drivers/gpu/drm/radeon/radeon_device.c
... ... @@ -1558,6 +1558,10 @@
1558 1558  
1559 1559 drm_kms_helper_poll_enable(dev);
1560 1560  
  1561 + /* set the power state here in case we are a PX system or headless */
  1562 + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
  1563 + radeon_pm_compute_clocks(rdev);
  1564 +
1561 1565 if (fbcon) {
1562 1566 radeon_fbdev_set_suspend(rdev, 0);
1563 1567 console_unlock();
drivers/gpu/drm/radeon/radeon_pm.c
... ... @@ -1104,7 +1104,6 @@
1104 1104 if (ret)
1105 1105 goto dpm_resume_fail;
1106 1106 rdev->pm.dpm_enabled = true;
1107   - radeon_pm_compute_clocks(rdev);
1108 1107 return;
1109 1108  
1110 1109 dpm_resume_fail:
drivers/gpu/drm/radeon/radeon_vm.c
... ... @@ -132,7 +132,7 @@
132 132 struct radeon_cs_reloc *list;
133 133 unsigned i, idx;
134 134  
135   - list = kmalloc_array(vm->max_pde_used + 1,
  135 + list = kmalloc_array(vm->max_pde_used + 2,
136 136 sizeof(struct radeon_cs_reloc), GFP_KERNEL);
137 137 if (!list)
138 138 return NULL;
... ... @@ -585,7 +585,8 @@
585 585 {
586 586 static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
587 587  
588   - uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
  588 + struct radeon_bo *pd = vm->page_directory;
  589 + uint64_t pd_addr = radeon_bo_gpu_offset(pd);
589 590 uint64_t last_pde = ~0, last_pt = ~0;
590 591 unsigned count = 0, pt_idx, ndw;
591 592 struct radeon_ib ib;
... ... @@ -642,6 +643,7 @@
642 643 incr, R600_PTE_VALID);
643 644  
644 645 if (ib.length_dw != 0) {
  646 + radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
645 647 radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
646 648 r = radeon_ib_schedule(rdev, &ib, NULL);
647 649 if (r) {
648 650  
649 651  
... ... @@ -689,15 +691,18 @@
689 691 /* walk over the address space and update the page tables */
690 692 for (addr = start; addr < end; ) {
691 693 uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
  694 + struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
692 695 unsigned nptes;
693 696 uint64_t pte;
694 697  
  698 + radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
  699 +
695 700 if ((addr & ~mask) == (end & ~mask))
696 701 nptes = end - addr;
697 702 else
698 703 nptes = RADEON_VM_PTE_COUNT - (addr & mask);
699 704  
700   - pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo);
  705 + pte = radeon_bo_gpu_offset(pt);
701 706 pte += (addr & mask) * 8;
702 707  
703 708 if ((last_pte + 8 * count) != pte) {